Upload 3 files
Browse files- app.py +122 -847
- joblib_model/xwoba_model.joblib +3 -0
- requirements.txt +1 -7
app.py
CHANGED
@@ -1,848 +1,123 @@
|
|
1 |
-
import polars as pl
|
2 |
-
import
|
3 |
-
import
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
The
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
#
|
121 |
-
|
122 |
-
|
123 |
-
'11':'AAA',
|
124 |
-
'14':'A (FSL)',}
|
125 |
-
|
126 |
-
|
127 |
-
level_dict_file = {
|
128 |
-
'11':'aaa',
|
129 |
-
'14':'a',}
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
year_list = [2024]
|
134 |
-
|
135 |
-
|
136 |
-
from shiny import App, reactive, ui, render
|
137 |
-
from shiny.ui import h2, tags
|
138 |
-
|
139 |
-
# Define the UI layout for the app
|
140 |
-
# Define the UI layout for the app
|
141 |
-
app_ui = ui.page_sidebar(
|
142 |
-
# Sidebar content
|
143 |
-
ui.sidebar(
|
144 |
-
# Row for selecting season and level
|
145 |
-
ui.row(
|
146 |
-
ui.column(6, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
|
147 |
-
ui.column(6, ui.input_select('level_input', 'Select Level', level_dict)),
|
148 |
-
),
|
149 |
-
# Row for the action button to get player list
|
150 |
-
ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
|
151 |
-
# Row for selecting the player
|
152 |
-
ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
|
153 |
-
|
154 |
-
ui.row(
|
155 |
-
ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
|
156 |
-
ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
|
157 |
-
),
|
158 |
-
|
159 |
-
# Row for the action button to generate plot
|
160 |
-
ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
|
161 |
-
width="400px"
|
162 |
-
),
|
163 |
-
|
164 |
-
# Main content area with header and tabs
|
165 |
-
ui.tags.div(
|
166 |
-
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
|
167 |
-
ui.tags.style(
|
168 |
-
"""
|
169 |
-
h4 {
|
170 |
-
margin-top: 1em;font-size:35px;
|
171 |
-
}
|
172 |
-
h2{
|
173 |
-
font-size:25px;
|
174 |
-
}
|
175 |
-
"""
|
176 |
-
),
|
177 |
-
ui.tags.h4("TJStats"),
|
178 |
-
ui.tags.i("Baseball Analytics and Visualizations"),
|
179 |
-
ui.markdown("""<a href='https://x.com/TJStats'>Follow me on Twitter</a><sup>1</sup>"""),
|
180 |
-
ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""),
|
181 |
-
ui.markdown("### MiLB Statcast Batting Summaries"),
|
182 |
-
ui.markdown("""This Shiny App allows you to generate Baseball Savant-style percentile bars for MiLB players in the 2024 Season.
|
183 |
-
Currently, MiLB Statcast is only available for AAA and A (Florida State League) level.
|
184 |
-
"""),
|
185 |
-
ui.markdown("""
|
186 |
-
For ease of sharing, you can right-click (desktop) or press+hold (mobile) to save/copy the image.
|
187 |
-
"""),
|
188 |
-
|
189 |
-
# Main content area with tabs
|
190 |
-
ui.navset_tab(
|
191 |
-
ui.nav_panel("Batter Summary",
|
192 |
-
ui.output_text("status_batter"),
|
193 |
-
ui.output_plot('batter_plot', width='1200px', height='1200px')
|
194 |
-
),
|
195 |
-
ui.nav_panel("Pitcher Summary",
|
196 |
-
ui.output_text("status_pitcher"),
|
197 |
-
ui.output_plot('pitcher_plot', width='1200px', height='1200px')
|
198 |
-
),
|
199 |
-
id="tabset"
|
200 |
-
)
|
201 |
-
)
|
202 |
-
)
|
203 |
-
|
204 |
-
def server(input, output, session):
|
205 |
-
@render.ui
|
206 |
-
@reactive.event(input.player_button,input.level_input,input.year_input,input.tabset, ignore_none=False)
|
207 |
-
def player_select_ui():
|
208 |
-
if input.tabset() == "Batter Summary":
|
209 |
-
#Get the list of pitchers for the selected level and season
|
210 |
-
df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter(
|
211 |
-
~pl.col("position").is_in(['P','TWP'])).sort("name")
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
# Create a dictionary of pitcher IDs and names
|
216 |
-
batter_dict_pos = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['position']))
|
217 |
-
|
218 |
-
year = int(input.year_input())
|
219 |
-
sport_id = int(input.level_input())
|
220 |
-
batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv').sort('batter_name',descending=False)
|
221 |
-
# Map elements in Polars DataFrame from a dictionary
|
222 |
-
batter_summary = batter_summary.with_columns(
|
223 |
-
pl.col("batter_id").map_elements(lambda x: batter_dict_pos.get(x, x)).alias("position")
|
224 |
-
)
|
225 |
-
|
226 |
-
|
227 |
-
batter_dict_pos = dict(zip(batter_summary['batter_id'], batter_summary['batter_name']))
|
228 |
-
# Create a dictionary of pitcher IDs and names
|
229 |
-
batter_dict = dict(zip(batter_summary['batter_id'], batter_summary['batter_name'] + ' - ' + batter_summary['position']))
|
230 |
-
|
231 |
-
# Return a select input for choosing a pitcher
|
232 |
-
return ui.input_select("batter_id", "Select Batter", batter_dict, selectize=True)
|
233 |
-
|
234 |
-
if input.tabset() == "Pitcher Summary":
|
235 |
-
#Get the list of pitchers for the selected level and season
|
236 |
-
df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter(
|
237 |
-
pl.col("position").is_in(['P','TWP'])).sort("name")
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
# Create a dictionary of pitcher IDs and names
|
242 |
-
batter_dict_pos = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['position']))
|
243 |
-
|
244 |
-
year = int(input.year_input())
|
245 |
-
sport_id = int(input.level_input())
|
246 |
-
batter_summary = pl.read_csv(f'data/statcast/pitcher_summary_{level_dict_file[str(sport_id)]}_{year}.csv').sort('pitcher_name',descending=False)
|
247 |
-
# Map elements in Polars DataFrame from a dictionary
|
248 |
-
batter_summary = batter_summary.with_columns(
|
249 |
-
pl.col("pitcher_id").map_elements(lambda x: batter_dict_pos.get(x, x)).alias("position")
|
250 |
-
)
|
251 |
-
|
252 |
-
|
253 |
-
batter_dict_pos = dict(zip(batter_summary['pitcher_id'], batter_summary['pitcher_name']))
|
254 |
-
# Create a dictionary of pitcher IDs and names
|
255 |
-
batter_dict = dict(zip(batter_summary['pitcher_id'], batter_summary['pitcher_name'] + ' - ' + batter_summary['position']))
|
256 |
-
|
257 |
-
# Return a select input for choosing a pitcher
|
258 |
-
return ui.input_select("pitcher_id", "Select Batter", batter_dict, selectize=True)
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
@output
|
263 |
-
@render.plot
|
264 |
-
@reactive.event(input.generate_plot, ignore_none=False)
|
265 |
-
def batter_plot():
|
266 |
-
|
267 |
-
|
268 |
-
merged_dict = {
|
269 |
-
"woba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "wOBA" },
|
270 |
-
"xwoba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "xwOBA" },
|
271 |
-
"launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Average EV"},
|
272 |
-
"launch_speed_90": { "format": '.1f', "percentile_flip": False, "stat_title": "90th% EV"},
|
273 |
-
"max_launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Max EV"},
|
274 |
-
"barrel_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Barrel%" },
|
275 |
-
"hard_hit_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Hard-Hit%" },
|
276 |
-
"sweet_spot_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "LA Sweet-Spot%" },
|
277 |
-
#"zone_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Zone%" },
|
278 |
-
"zone_swing_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Z-Swing%" },
|
279 |
-
"chase_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "O-Swing%" },
|
280 |
-
"whiff_rate": { "format": '.1%', "percentile_flip": True, "stat_title": "Whiff%" },
|
281 |
-
"zone_contact_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Z-Contact%" },
|
282 |
-
"k_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "K%" },
|
283 |
-
"bb_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "BB%" },
|
284 |
-
"pull_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull%" },
|
285 |
-
"pulled_fly_ball_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull FB%" },
|
286 |
-
}
|
287 |
-
# Show progress/loading notification
|
288 |
-
with ui.Progress(min=0, max=1) as p:
|
289 |
-
|
290 |
-
def draw_baseball_savant_percentiles(new_player_metrics, new_player_percentiles, colors=None,
|
291 |
-
sport_id=None,
|
292 |
-
year_input=None):
|
293 |
-
"""
|
294 |
-
Draw Baseball Savant-style percentile bars with proper alignment and scaling.
|
295 |
-
|
296 |
-
:param new_player_metrics: DataFrame containing new player metrics.
|
297 |
-
:param new_player_percentiles: DataFrame containing new player percentiles.
|
298 |
-
:param colors: List of colors for bars (optional, red/blue default).
|
299 |
-
"""
|
300 |
-
# Extract player information
|
301 |
-
batter_id = new_player_metrics['batter_id'][0]
|
302 |
-
player_name = batter_name_id[batter_id]
|
303 |
-
stats = [merged_dict[x]['stat_title'] for x in merged_dict.keys()]
|
304 |
-
|
305 |
-
# Calculate percentiles and values
|
306 |
-
percentiles = [int((1 - x) * 100) if merged_dict[stat]["percentile_flip"] else int(x * 100) for x, stat in zip(new_player_percentiles.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
|
307 |
-
percentiles = np.clip(percentiles, 1, 100)
|
308 |
-
values = [str(f'{x:{merged_dict[stat]["format"]}}').strip('%') for x, stat in zip(new_player_metrics.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
|
309 |
-
|
310 |
-
# Get team logo URL
|
311 |
-
logo_url = image_dict[team_dict[player_team_dict[batter_id]]]
|
312 |
-
|
313 |
-
# Create a custom colormap
|
314 |
-
color_list = ['#3661AD', '#B4CFD1', '#D82129']
|
315 |
-
cmap = LinearSegmentedColormap.from_list("custom_cmap", color_list)
|
316 |
-
norm = Normalize(vmin=0.1, vmax=0.9)
|
317 |
-
norm_percentiles = norm(percentiles / 100)
|
318 |
-
colors = [cmap(p) for p in norm_percentiles]
|
319 |
-
|
320 |
-
# Figure setup
|
321 |
-
num_stats = len(stats)
|
322 |
-
bar_height = 4.5
|
323 |
-
spacing = 1
|
324 |
-
fig_height = (bar_height + spacing) * num_stats
|
325 |
-
fig = plt.figure(figsize=(12, 12))
|
326 |
-
gs = GridSpec(6, 5, height_ratios=[0.1, 1.5, 0.9, 0.9, 7.6, 0.1], width_ratios=[0.2, 1.5, 7, 1.5, 0.2])
|
327 |
-
|
328 |
-
# Define subplots
|
329 |
-
ax_title = fig.add_subplot(gs[1, 2])
|
330 |
-
ax_table = fig.add_subplot(gs[2, :])
|
331 |
-
ax_fv_table = fig.add_subplot(gs[3, :])
|
332 |
-
ax_fv_table.axis('off')
|
333 |
-
ax = fig.add_subplot(gs[4, :])
|
334 |
-
ax_logo = fig.add_subplot(gs[1, 3])
|
335 |
-
|
336 |
-
ax.set_xlim(-1, 99)
|
337 |
-
ax.set_ylim(-1, 99)
|
338 |
-
ax.set_aspect("equal")
|
339 |
-
ax.axis("off")
|
340 |
-
|
341 |
-
# Draw each bar
|
342 |
-
for i, (stat, percentile, value, color) in enumerate(zip(stats, percentiles, values, colors)):
|
343 |
-
y = fig_height - (i + 1) * (bar_height + spacing)
|
344 |
-
ax.add_patch(patches.Rectangle((0, y + bar_height / 4), 100, bar_height / 2, color="#C7DCDC", lw=0))
|
345 |
-
ax.add_patch(patches.Rectangle((0, y), percentile, bar_height, color=color, lw=0))
|
346 |
-
circle_y = y + bar_height - bar_height / 2
|
347 |
-
circle = plt.Circle((percentile, circle_y), bar_height / 2, color=color, ec='white', lw=1.5, zorder=10)
|
348 |
-
ax.add_patch(circle)
|
349 |
-
fs = 14
|
350 |
-
ax.text(percentile, circle_y, f"{percentile}", ha="center", va="center", fontsize=10, color='white', zorder=10, fontweight='bold')
|
351 |
-
ax.text(-5, y + bar_height / 2, stat, ha="right", va="center", fontsize=fs)
|
352 |
-
ax.text(115, y + bar_height / 2, str(value), ha="right", va="center", fontsize=fs, zorder=5)
|
353 |
-
if i < len(stats) and i > 0:
|
354 |
-
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=-33, xmax=0)
|
355 |
-
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=100, xmax=115)
|
356 |
-
|
357 |
-
# Draw vertical lines for 10%, 50%, and 90% with labels
|
358 |
-
for x, label, align, color in zip([10, 50, 90], ["Poor", "Average", "Great"], ['center', 'center', 'center'], color_list):
|
359 |
-
ax.axvline(x=x, ymin=0, ymax=1, color='#FFF', linestyle='-', lw=1, zorder=1, alpha=0.5)
|
360 |
-
ax.text(x, fig_height + 4, label, ha=align, va='center', fontsize=12, fontweight='bold', color=color)
|
361 |
-
triangle = patches.RegularPolygon((x, fig_height + 1), 3, radius=1, orientation=0, color=color, zorder=2)
|
362 |
-
ax.add_patch(triangle)
|
363 |
-
|
364 |
-
# # Title
|
365 |
-
# ax_title.set_ylim(0, 1)
|
366 |
-
# ax_title.text(0.5, 0.5, f"{player_name} - {player_position_dict[batter_id]}\nPercentile Rankings - 2024 AAA", ha="center", va="center", fontsize=24)
|
367 |
-
# ax_title.axis("off")
|
368 |
-
player_bio(batter_id, ax=ax_title, sport_id=sport_id, year_input=year_input)
|
369 |
-
|
370 |
-
# Add team logo
|
371 |
-
#response = requests.get(logo_url)
|
372 |
-
if input.switch():
|
373 |
-
response = requests.get(input.logo_select())
|
374 |
-
else:
|
375 |
-
response = requests.get(logo_url)
|
376 |
-
img = Image.open(BytesIO(response.content))
|
377 |
-
ax_logo.imshow(img)
|
378 |
-
ax_logo.axis("off")
|
379 |
-
ax.axis('equal')
|
380 |
-
|
381 |
-
# Metrics data table
|
382 |
-
metrics_data = {
|
383 |
-
"Pitches": new_player_metrics['pitches'][0],
|
384 |
-
"PA": new_player_metrics['pa'][0],
|
385 |
-
"BIP": new_player_metrics['bip'][0],
|
386 |
-
"HR": f"{new_player_metrics['home_run'][0]:.0f}",
|
387 |
-
"AVG": f"{new_player_metrics['avg'][0]:.3f}",
|
388 |
-
"OBP": f"{new_player_metrics['obp'][0]:.3f}",
|
389 |
-
"SLG": f"{new_player_metrics['slg'][0]:.3f}",
|
390 |
-
"OPS": f"{new_player_metrics['obp'][0] + new_player_metrics['slg'][0]:.3f}",
|
391 |
-
}
|
392 |
-
df_table = pd.DataFrame(metrics_data, index=[0])
|
393 |
-
ax_table.axis('off')
|
394 |
-
table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
|
395 |
-
for key, cell in table.get_celld().items():
|
396 |
-
if key[0] == 0:
|
397 |
-
cell.set_text_props(fontweight='bold')
|
398 |
-
table.auto_set_font_size(False)
|
399 |
-
table.set_fontsize(12)
|
400 |
-
table.scale(1, 1.5)
|
401 |
-
|
402 |
-
# Additional subplots for spacing
|
403 |
-
ax_top = fig.add_subplot(gs[0, :])
|
404 |
-
ax_bot = fig.add_subplot(gs[-1, :])
|
405 |
-
ax_top.axis('off')
|
406 |
-
ax_bot.axis('off')
|
407 |
-
ax_bot.text(0.05, 2, "By: Thomas Nestico (@TJStats)", ha="left", va="center", fontsize=14)
|
408 |
-
ax_bot.text(0.95, 2, "Data: MLB, Fangraphs", ha="right", va="center", fontsize=14)
|
409 |
-
fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
|
410 |
-
|
411 |
-
# Player headshot
|
412 |
-
ax_headshot = fig.add_subplot(gs[1, 1])
|
413 |
-
try:
|
414 |
-
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{batter_id}/headshot/milb/current.png'
|
415 |
-
response = requests.get(url)
|
416 |
-
img = Image.open(BytesIO(response.content))
|
417 |
-
ax_headshot.set_xlim(0, 1)
|
418 |
-
ax_headshot.set_ylim(0, 1)
|
419 |
-
ax_headshot.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
|
420 |
-
except PIL.UnidentifiedImageError:
|
421 |
-
ax_headshot.axis('off')
|
422 |
-
#return
|
423 |
-
ax_headshot.axis('off')
|
424 |
-
ax_table.set_title('Season Summary', style='italic')
|
425 |
-
|
426 |
-
# Fangraphs scouting grades table
|
427 |
-
print(batter_id)
|
428 |
-
|
429 |
-
if batter_id not in dict_mlb_fg.keys():
|
430 |
-
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
|
431 |
-
return
|
432 |
-
df_fv_table = df_prospects[(df_prospects['minorMasterId'] == dict_mlb_fg[batter_id])][['cFV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']].reset_index(drop=True)
|
433 |
-
ax_fv_table.axis('off')
|
434 |
-
if df_fv_table.empty:
|
435 |
-
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
|
436 |
-
return
|
437 |
-
df_fv_table.columns = ['FV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']
|
438 |
-
table_fv = ax_fv_table.table(cellText=df_fv_table.values, colLabels=df_fv_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
|
439 |
-
for key, cell in table_fv.get_celld().items():
|
440 |
-
if key[0] == 0:
|
441 |
-
cell.set_text_props(fontweight='bold')
|
442 |
-
table_fv.auto_set_font_size(False)
|
443 |
-
table_fv.set_fontsize(12)
|
444 |
-
table_fv.scale(1, 1.5)
|
445 |
-
ax_fv_table.set_title('Fangraphs Scouting Grades', style='italic')
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
#plt.show()
|
450 |
-
|
451 |
-
|
452 |
-
def calculate_new_player_percentiles(player_id, new_player_metrics, player_summary_filtered):
|
453 |
-
"""
|
454 |
-
Calculate percentiles for a new player's metrics.
|
455 |
-
|
456 |
-
:param player_id: ID of the player.
|
457 |
-
:param new_player_metrics: DataFrame containing new player metrics.
|
458 |
-
:param player_summary_filtered: Filtered player summary DataFrame.
|
459 |
-
:return: DataFrame containing new player percentiles.
|
460 |
-
"""
|
461 |
-
filtered_summary_clone = player_summary_filtered[['batter_id'] + stat_list].filter(pl.col('batter_id') != player_id).clone()
|
462 |
-
combined_data = pl.concat([filtered_summary_clone, new_player_metrics], how="vertical").to_pandas()
|
463 |
-
combined_percentiles = pl.DataFrame(pd.concat([combined_data['batter_id'], combined_data[stat_list].rank(pct=True)], axis=1))
|
464 |
-
new_player_percentiles = combined_percentiles.filter(pl.col('batter_id') == player_id)
|
465 |
-
return new_player_percentiles
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
p.set(message="Generating plot", detail="This may take a while...")
|
470 |
-
|
471 |
-
|
472 |
-
p.set(0.3, "Gathering data...")
|
473 |
-
|
474 |
-
# Example: New player's metrics
|
475 |
-
year = int(input.year_input())
|
476 |
-
sport_id = int(input.level_input())
|
477 |
-
batter_id = int(input.batter_id())
|
478 |
-
|
479 |
-
|
480 |
-
df_player = scrape.get_players(sport_id=sport_id,season=year)
|
481 |
-
batter_name_id = dict(zip(df_player['player_id'],df_player['name']))
|
482 |
-
player_team_dict = dict(zip(df_player['player_id'],df_player['team']))
|
483 |
-
player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
|
484 |
-
|
485 |
-
|
486 |
-
batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv')
|
487 |
-
df_prospects = pd.read_csv(f'data/prospects/prospects_{year}.csv')
|
488 |
-
df_rosters = pd.read_csv(f'data/rosters/fangraphs_rosters_{year}.csv')
|
489 |
-
df_small = df_rosters[['minorbamid','minormasterid']].dropna()
|
490 |
-
dict_mlb_fg=dict(zip(df_small['minorbamid'].astype(int),df_small['minormasterid']))
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
batter_summary_filter = batter_summary.filter((pl.col('pa') >= 300) & (pl.col('launch_speed') >= 0))
|
496 |
-
stat_list = batter_summary.columns[2:]
|
497 |
-
batter_summary_filter_pd = batter_summary_filter.to_pandas()
|
498 |
-
new_player_metrics = batter_summary.filter(pl.col('batter_id') == batter_id)[['batter_id'] + stat_list]
|
499 |
-
|
500 |
-
# Get percentiles for the new player
|
501 |
-
new_player_percentiles = calculate_new_player_percentiles(batter_id, new_player_metrics, batter_summary_filter)
|
502 |
-
|
503 |
-
p.set(0.6, "Creating plot...")
|
504 |
-
# Draw Baseball Savant-style percentile bars
|
505 |
-
draw_baseball_savant_percentiles(new_player_metrics=new_player_metrics,
|
506 |
-
new_player_percentiles=new_player_percentiles,
|
507 |
-
sport_id=sport_id,
|
508 |
-
year_input=year)
|
509 |
-
|
510 |
-
@output
|
511 |
-
@render.plot
|
512 |
-
@reactive.event(input.generate_plot, ignore_none=False)
|
513 |
-
def pitcher_plot():
|
514 |
-
merged_dict = {
|
515 |
-
"avg_start_speed_ff": { "format": '.1f', "percentile_flip": False, "stat_title": "Fastball Velocity" },
|
516 |
-
"extension": { "format": '.1f', "percentile_flip": False, "stat_title": "Extension" },
|
517 |
-
"woba_percent": { "format": '.3f', "percentile_flip": True, "stat_title": "wOBA" },
|
518 |
-
"xwoba_percent": { "format": '.3f', "percentile_flip": True, "stat_title": "xwOBA" },
|
519 |
-
"launch_speed": { "format": '.1f', "percentile_flip": True, "stat_title": "Average EV"},
|
520 |
-
"barrel_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "Barrel%" },
|
521 |
-
"hard_hit_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "Hard-Hit%" },
|
522 |
-
"whiff_rate": { "format": '.1%', "percentile_flip": False, "stat_title": "Whiff%" },
|
523 |
-
"zone_contact_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "Z-Contact%" },
|
524 |
-
"zone_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Zone%" },
|
525 |
-
"chase_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "O-Swing%" },
|
526 |
-
"csw_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "CSW%" },
|
527 |
-
"k_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "K%" },
|
528 |
-
"bb_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "BB%" },
|
529 |
-
"k_minus_bb_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "K - BB%" },
|
530 |
-
"ground_ball_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "GB%" },
|
531 |
-
}
|
532 |
-
|
533 |
-
with ui.Progress(min=0, max=1) as p:
|
534 |
-
|
535 |
-
def draw_baseball_savant_percentiles(new_player_metrics, new_player_percentiles, colors=None,
|
536 |
-
sport_id=None,
|
537 |
-
year_input=None):
|
538 |
-
"""
|
539 |
-
Draw Baseball Savant-style percentile bars with proper alignment and scaling.
|
540 |
-
|
541 |
-
:param new_player_metrics: DataFrame containing new player metrics.
|
542 |
-
:param new_player_percentiles: DataFrame containing new player percentiles.
|
543 |
-
:param colors: List of colors for bars (optional, red/blue default).
|
544 |
-
"""
|
545 |
-
# Extract player information
|
546 |
-
pitcher_id = new_player_metrics['pitcher_id'][0]
|
547 |
-
player_name = pitcher_name_id[pitcher_id]
|
548 |
-
stats = [merged_dict[x]['stat_title'] for x in merged_dict.keys()]
|
549 |
-
|
550 |
-
# Calculate percentiles and values
|
551 |
-
percentiles = [int((1 - x) * 100) if merged_dict[stat]["percentile_flip"] else int(x * 100) for x, stat in zip(new_player_percentiles.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
|
552 |
-
percentiles = np.clip(percentiles, 1, 100)
|
553 |
-
values = [str(f'{x:{merged_dict[stat]["format"]}}').strip('%') for x, stat in zip(new_player_metrics.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
|
554 |
-
|
555 |
-
# Get team logo URL
|
556 |
-
logo_url = image_dict[team_dict[player_team_dict[pitcher_id]]]
|
557 |
-
|
558 |
-
# Create a custom colormap
|
559 |
-
color_list = ['#3661AD', '#B4CFD1', '#D82129']
|
560 |
-
cmap = LinearSegmentedColormap.from_list("custom_cmap", color_list)
|
561 |
-
norm = Normalize(vmin=0.1, vmax=0.9)
|
562 |
-
norm_percentiles = norm(percentiles / 100)
|
563 |
-
colors = [cmap(p) for p in norm_percentiles]
|
564 |
-
|
565 |
-
# Figure setup
|
566 |
-
num_stats = len(stats)
|
567 |
-
bar_height = 4.4
|
568 |
-
spacing = 0.7
|
569 |
-
fig_height = (bar_height + spacing) * num_stats
|
570 |
-
fig = plt.figure(figsize=(12, 12))
|
571 |
-
gs = GridSpec(7, 5, height_ratios=[0.05, 1.5, 0.75, 0.75,0.75, 7.7, 0.1], width_ratios=[0.2, 1.5, 7, 1.5, 0.2])
|
572 |
-
|
573 |
-
# Define subplots
|
574 |
-
ax_title = fig.add_subplot(gs[1, 2])
|
575 |
-
ax_table = fig.add_subplot(gs[2, :])
|
576 |
-
ax_fv_table = fig.add_subplot(gs[3, :])
|
577 |
-
ax_fv_table.axis('off')
|
578 |
-
ax_stuff = fig.add_subplot(gs[4, :])
|
579 |
-
ax = fig.add_subplot(gs[5, :])
|
580 |
-
ax_logo = fig.add_subplot(gs[1, 3])
|
581 |
-
|
582 |
-
ax.set_xlim(-1, 99)
|
583 |
-
ax.set_ylim(-1, 99)
|
584 |
-
ax.set_aspect("equal")
|
585 |
-
ax.axis("off")
|
586 |
-
|
587 |
-
# Draw each bar
|
588 |
-
for i, (stat, percentile, value, color) in enumerate(zip(stats, percentiles, values, colors)):
|
589 |
-
y = fig_height - (i + 1) * (bar_height + spacing)
|
590 |
-
ax.add_patch(patches.Rectangle((0, y + bar_height / 4), 100, bar_height / 2, color="#C7DCDC", lw=0))
|
591 |
-
ax.add_patch(patches.Rectangle((0, y), percentile, bar_height, color=color, lw=0))
|
592 |
-
circle_y = y + bar_height - bar_height / 2
|
593 |
-
circle = plt.Circle((percentile, circle_y), bar_height / 2, color=color, ec='white', lw=1.5, zorder=10)
|
594 |
-
ax.add_patch(circle)
|
595 |
-
fs = 14
|
596 |
-
ax.text(percentile, circle_y, f"{percentile}", ha="center", va="center", fontsize=10, color='white', zorder=10, fontweight='bold')
|
597 |
-
ax.text(-5, y + bar_height / 2, stat, ha="right", va="center", fontsize=fs)
|
598 |
-
ax.text(115, y + bar_height / 2, str(value), ha="right", va="center", fontsize=fs, zorder=5)
|
599 |
-
if i < len(stats) and i > 0:
|
600 |
-
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=-33, xmax=0)
|
601 |
-
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=100, xmax=115)
|
602 |
-
|
603 |
-
# Draw vertical lines for 10%, 50%, and 90% with labels
|
604 |
-
for x, label, align, color in zip([10, 50, 90], ["Poor", "Average", "Great"], ['center', 'center', 'center'], color_list):
|
605 |
-
ax.axvline(x=x, ymin=0, ymax=1, color='#FFF', linestyle='-', lw=1, zorder=1, alpha=0.5)
|
606 |
-
ax.text(x, fig_height + 4, label, ha=align, va='center', fontsize=12, fontweight='bold', color=color)
|
607 |
-
triangle = patches.RegularPolygon((x, fig_height + 1), 3, radius=1, orientation=0, color=color, zorder=2)
|
608 |
-
ax.add_patch(triangle)
|
609 |
-
|
610 |
-
# # Title
|
611 |
-
# ax_title.set_ylim(0, 1)
|
612 |
-
# ax_title.text(0.5, 0.5, f"{player_name} - {player_position_dict[pitcher_id]}\nPercentile Rankings - 2024 AAA", ha="center", va="center", fontsize=24)
|
613 |
-
# ax_title.axis("off")
|
614 |
-
player_bio(pitcher_id, ax=ax_title, sport_id=sport_id, year_input=year_input)
|
615 |
-
|
616 |
-
# Add team logo
|
617 |
-
#response = requests.get(logo_url)
|
618 |
-
#######if input.switch():
|
619 |
-
######## response = requests.get(input.logo_select())
|
620 |
-
######else:
|
621 |
-
response = requests.get(logo_url)
|
622 |
-
img = Image.open(BytesIO(response.content))
|
623 |
-
ax_logo.imshow(img)
|
624 |
-
ax_logo.axis("off")
|
625 |
-
ax.axis('equal')
|
626 |
-
lg_dict = {
|
627 |
-
11:'all',
|
628 |
-
14:10
|
629 |
-
}
|
630 |
-
levelt = {
|
631 |
-
11:1,
|
632 |
-
14:4
|
633 |
-
}
|
634 |
-
|
635 |
-
|
636 |
-
fg_api = f'https://www.fangraphs.com/api/leaders/minor-league/data?pos=all&level={levelt[sport_id]}&lg={lg_dict[sport_id]}&stats=pit&qual=0&type=2&team=&season=2024&seasonEnd=2024&org=&ind=0&splitTeam=false'
|
637 |
-
response = requests.get(fg_api)
|
638 |
-
data = response.json()
|
639 |
-
df_fg = pl.DataFrame(data)
|
640 |
-
if pitcher_id not in dict_mlb_fg.keys():
|
641 |
-
#ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
|
642 |
-
metrics_data = {
|
643 |
-
"Pitches": new_player_metrics['pitches'][0],
|
644 |
-
"PA": new_player_metrics['pa'][0],
|
645 |
-
"BIP": new_player_metrics['bip'][0],
|
646 |
-
"HR": f"{new_player_metrics['home_run'][0]:.0f}",
|
647 |
-
"K": f"{new_player_metrics['k'][0]:.0f}",
|
648 |
-
"BB": f"{new_player_metrics['bb'][0]:.0f}",
|
649 |
-
}
|
650 |
-
else:
|
651 |
-
df_fg_filter = df_fg.filter(pl.col('minormasterid') == dict_mlb_fg[pitcher_id])
|
652 |
-
# Metrics data table
|
653 |
-
metrics_data = {
|
654 |
-
"G": f"{df_fg_filter['G'][0]:.0f}",
|
655 |
-
"IP": f"{df_fg_filter['IP'][0]:.1f}",
|
656 |
-
"Pitches": f"{new_player_metrics['pitches'][0]:.0f}",
|
657 |
-
"PA": f"{df_fg_filter['TBF'][0]:.0f}",
|
658 |
-
"BIP": new_player_metrics['bip'][0],
|
659 |
-
"ERA": f"{df_fg_filter['ERA'][0]:.2f}",
|
660 |
-
"FIP": f"{df_fg_filter['FIP'][0]:.2f}",
|
661 |
-
"WHIP": f"{df_fg_filter['WHIP'][0]:.2f}",
|
662 |
-
}
|
663 |
-
df_table = pd.DataFrame(metrics_data, index=[0])
|
664 |
-
ax_table.axis('off')
|
665 |
-
table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
|
666 |
-
for key, cell in table.get_celld().items():
|
667 |
-
if key[0] == 0:
|
668 |
-
cell.set_text_props(fontweight='bold')
|
669 |
-
table.auto_set_font_size(False)
|
670 |
-
table.set_fontsize(12)
|
671 |
-
table.scale(1, 1.5)
|
672 |
-
|
673 |
-
# Additional subplots for spacing
|
674 |
-
ax_top = fig.add_subplot(gs[0, :])
|
675 |
-
ax_bot = fig.add_subplot(gs[-1, :])
|
676 |
-
ax_top.axis('off')
|
677 |
-
ax_bot.axis('off')
|
678 |
-
ax_bot.text(0.05, 2, "By: Thomas Nestico (@TJStats)", ha="left", va="center", fontsize=14)
|
679 |
-
ax_bot.text(0.95, 2, "Data: MLB, Fangraphs", ha="right", va="center", fontsize=14)
|
680 |
-
|
681 |
-
|
682 |
-
# Player headshot
|
683 |
-
ax_headshot = fig.add_subplot(gs[1, 1])
|
684 |
-
try:
|
685 |
-
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{pitcher_id}/headshot/milb/current.png'
|
686 |
-
response = requests.get(url)
|
687 |
-
img = Image.open(BytesIO(response.content))
|
688 |
-
ax_headshot.set_xlim(0, 1)
|
689 |
-
ax_headshot.set_ylim(0, 1)
|
690 |
-
ax_headshot.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
|
691 |
-
except PIL.UnidentifiedImageError:
|
692 |
-
ax_headshot.axis('off')
|
693 |
-
#return
|
694 |
-
ax_headshot.axis('off')
|
695 |
-
ax_table.set_title('Season Summary', style='italic')
|
696 |
-
|
697 |
-
# Fangraphs scouting grades table
|
698 |
-
print(pitcher_id)
|
699 |
-
|
700 |
-
if pitcher_id not in dict_mlb_fg.keys():
|
701 |
-
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
|
702 |
-
#return
|
703 |
-
try:
|
704 |
-
df_fv_table = df_prospects[(df_prospects['minorMasterId'] == dict_mlb_fg[pitcher_id])][['cFV','FB', 'SL', 'CB', 'CH', 'SPL', 'CT','CMD']].dropna(axis=1).reset_index(drop=True)
|
705 |
-
except KeyError:
|
706 |
-
df_fv_table = pd.DataFrame()
|
707 |
-
# ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
|
708 |
-
#return
|
709 |
-
ax_fv_table.axis('off')
|
710 |
-
if df_fv_table.empty:
|
711 |
-
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
|
712 |
-
#return
|
713 |
-
else:
|
714 |
-
df_fv_table.columns = ['FV']+[x.upper() for x in df_fv_table.columns[1:]]
|
715 |
-
table_fv = ax_fv_table.table(cellText=df_fv_table.values, colLabels=df_fv_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
|
716 |
-
for key, cell in table_fv.get_celld().items():
|
717 |
-
if key[0] == 0:
|
718 |
-
cell.set_text_props(fontweight='bold')
|
719 |
-
table_fv.auto_set_font_size(False)
|
720 |
-
table_fv.set_fontsize(12)
|
721 |
-
table_fv.scale(1, 1.5)
|
722 |
-
ax_fv_table.set_title('Fangraphs Scouting Grades', style='italic')
|
723 |
-
|
724 |
-
|
725 |
-
# df_stuff_filter = df_stuff.filter(pl.col('pitcher_id')==pitcher_id)
|
726 |
-
|
727 |
-
stuff_table = ax_stuff.table(cellText=[df_stuff_filter['tj_stuff_plus']],
|
728 |
-
colLabels=df_stuff_filter['pitch_type'],
|
729 |
-
cellLoc='center',
|
730 |
-
loc='center', bbox=[0.07, 0, 0.86, 1])
|
731 |
-
stuff_table.auto_set_font_size(False)
|
732 |
-
stuff_table.set_fontsize(12)
|
733 |
-
stuff_table.scale(1, 1.5)
|
734 |
-
ax_stuff.axis('off')
|
735 |
-
ax_stuff.set_title('tjStuff+', style='italic')
|
736 |
-
for key, cell in stuff_table.get_celld().items():
|
737 |
-
if key[0] == 0:
|
738 |
-
cell.set_text_props(fontweight='bold')
|
739 |
-
|
740 |
-
# Color the stuff_table values based on the cmap defined
|
741 |
-
for (i, j), cell in stuff_table.get_celld().items():
|
742 |
-
if i == 0:
|
743 |
-
cell.set_text_props(fontweight='bold')
|
744 |
-
else:
|
745 |
-
norm = Normalize(vmin=90, vmax=110)
|
746 |
-
value = float(cell.get_text().get_text())
|
747 |
-
color = cmap(norm(value))
|
748 |
-
cell.set_facecolor(color)
|
749 |
-
#cell.set_text_props(color='white' if value < 100 else 'black')
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
def calculate_new_player_percentiles(player_id, new_player_metrics, player_summary_filtered):
|
762 |
-
"""
|
763 |
-
Calculate percentiles for a new player's metrics.
|
764 |
-
|
765 |
-
:param player_id: ID of the player.
|
766 |
-
:param new_player_metrics: DataFrame containing new player metrics.
|
767 |
-
:param player_summary_filtered: Filtered player summary DataFrame.
|
768 |
-
:return: DataFrame containing new player percentiles.
|
769 |
-
"""
|
770 |
-
filtered_summary_clone = player_summary_filtered[['pitcher_id'] + stat_list].filter(pl.col('pitcher_id') != player_id).clone()
|
771 |
-
combined_data = pl.concat([filtered_summary_clone, new_player_metrics], how="vertical").to_pandas()
|
772 |
-
combined_percentiles = pl.DataFrame(pd.concat([combined_data['pitcher_id'], combined_data[stat_list].rank(pct=True)], axis=1))
|
773 |
-
new_player_percentiles = combined_percentiles.filter(pl.col('pitcher_id') == player_id)
|
774 |
-
return new_player_percentiles
|
775 |
-
|
776 |
-
p.set(message="Generating plot", detail="This may take a while...")
|
777 |
-
|
778 |
-
|
779 |
-
p.set(0.3, "Gathering data...")
|
780 |
-
|
781 |
-
|
782 |
-
df_teams = scrape.get_teams()
|
783 |
-
team_dict = dict(zip(df_teams['team_id'],df_teams['parent_org_abbreviation']))
|
784 |
-
|
785 |
-
# Example: New player's metrics
|
786 |
-
# Example: New player's metrics
|
787 |
-
year = int(input.year_input())
|
788 |
-
sport_id = int(input.level_input())
|
789 |
-
pitcher_id = int(input.pitcher_id())
|
790 |
-
|
791 |
-
df_player = scrape.get_players(sport_id=sport_id,season=2024)
|
792 |
-
pitcher_name_id = dict(zip(df_player['player_id'],df_player['name']))
|
793 |
-
player_team_dict = dict(zip(df_player['player_id'],df_player['team']))
|
794 |
-
player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
|
795 |
-
player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
pitcher_summary = pl.read_csv(f'data/statcast/pitcher_summary_{level_dict_file[str(sport_id)]}_{year}.csv')
|
801 |
-
df_prospects = pd.read_csv(f'data/prospects/prospects_{year}.csv')
|
802 |
-
df_rosters = pd.read_csv(f'data/rosters/fangraphs_rosters_{year}.csv')
|
803 |
-
df_small = df_rosters[['minorbamid','minormasterid']].dropna()
|
804 |
-
dict_mlb_fg=dict(zip(df_small['minorbamid'].astype(int),df_small['minormasterid']))
|
805 |
-
|
806 |
-
df_stuff = pl.read_csv(f'data/stuff/stuff_{level_dict_file[str(sport_id)]}_{year}.csv')
|
807 |
-
# Filter out the "All" row
|
808 |
-
filtered_df = df_stuff.filter(pl.col("pitch_type") != "All")
|
809 |
-
|
810 |
-
filtered_all_df = df_stuff.filter(pl.col("pitch_type") == "All")
|
811 |
-
# Calculate total pitches for each pitcher and proportion of each pitch type
|
812 |
-
result_df = (
|
813 |
-
filtered_df
|
814 |
-
.with_columns([
|
815 |
-
# Total pitches for each pitcher
|
816 |
-
pl.col("pitches").sum().over("pitcher_id").alias("total_pitches"),
|
817 |
-
# Proportion of pitches
|
818 |
-
(pl.col("pitches") / pl.col("pitches").sum().over("pitcher_id")).alias("pitch_proportion"),
|
819 |
-
])
|
820 |
-
).filter(pl.col("pitch_proportion") > 0.05)
|
821 |
-
|
822 |
-
df_stuff = pl.concat([filtered_all_df.with_columns(
|
823 |
-
[pl.col("pitches").sum().over("pitcher_id").alias("total_pitches"),
|
824 |
-
(pl.col("pitches") / pl.col("pitches").sum().over("pitcher_id")).alias("pitch_proportion")]
|
825 |
-
), result_df])
|
826 |
-
|
827 |
-
|
828 |
-
|
829 |
-
|
830 |
-
df_stuff_filter = df_stuff.filter(pl.col('pitcher_id')==pitcher_id)
|
831 |
-
|
832 |
-
pitcher_summary_filter = pitcher_summary.filter((pl.col('pa') >= 300) & (pl.col('launch_speed') >= 0))
|
833 |
-
stat_list = pitcher_summary.columns[2:]
|
834 |
-
pitcher_summary_filter_pd = pitcher_summary_filter.to_pandas()
|
835 |
-
new_player_metrics = pitcher_summary.filter(pl.col('pitcher_id') == pitcher_id)[['pitcher_id'] + stat_list]
|
836 |
-
|
837 |
-
# Get percentiles for the new player
|
838 |
-
new_player_percentiles = calculate_new_player_percentiles(pitcher_id, new_player_metrics, pitcher_summary_filter)
|
839 |
-
|
840 |
-
p.set(0.6, "Creating plot...")
|
841 |
-
# Draw Baseball Savant-style percentile bars
|
842 |
-
draw_baseball_savant_percentiles(new_player_metrics=new_player_metrics,
|
843 |
-
new_player_percentiles=new_player_percentiles,
|
844 |
-
sport_id=sport_id,
|
845 |
-
year_input=year)
|
846 |
-
|
847 |
-
|
848 |
app = App(app_ui, server)
|
|
|
1 |
+
import polars as pl
|
2 |
+
import numpy as np
|
3 |
+
import joblib
|
4 |
+
from shiny import App, reactive, render, ui
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import matplotlib.ticker as tkr
|
7 |
+
import seaborn as sns
|
8 |
+
import adjustText
|
9 |
+
sns.set_style('whitegrid')
|
10 |
+
|
11 |
+
|
12 |
+
import matplotlib
|
13 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFFFFF','#FFB000','#FE6100'])
|
14 |
+
|
15 |
+
xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
|
16 |
+
|
17 |
+
x = np.arange(-30,90.5,.5)
|
18 |
+
y = np.arange(0,120.5,0.1)
|
19 |
+
|
20 |
+
xx, yy = np.meshgrid(x, y)
|
21 |
+
|
22 |
+
df = pl.DataFrame({'launch_angle': xx.ravel(), 'launch_speed': yy.ravel()})
|
23 |
+
|
24 |
+
df = df.with_columns(
|
25 |
+
pl.Series('xwoba', xwoba_model.predict_proba(df) @ [0, 0.883, 1.244, 1.569, 2.004])
|
26 |
+
)
|
27 |
+
|
28 |
+
app_ui = ui.page_sidebar(
|
29 |
+
ui.sidebar(
|
30 |
+
ui.markdown("""
|
31 |
+
### How to use this app
|
32 |
+
|
33 |
+
1. Click anywhere on the plot to select a point, or manually enter coordinates
|
34 |
+
2. The selected point's coordinates will update automatically
|
35 |
+
3. The xwOBA value will be calculated based on these coordinates
|
36 |
+
"""),
|
37 |
+
ui.hr(),
|
38 |
+
ui.input_numeric("x_select", "Launch Speed (mph)", value=110),
|
39 |
+
ui.input_numeric("y_select", "Launch Angle (°)", value=30),
|
40 |
+
|
41 |
+
|
42 |
+
),
|
43 |
+
ui.output_plot("plot",width='900px',height='900px', click=True)
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
def server(input, output, session):
|
48 |
+
# Store the coordinates in reactive values
|
49 |
+
x_coord = reactive.value(110)
|
50 |
+
y_coord = reactive.value(30)
|
51 |
+
|
52 |
+
@reactive.effect
|
53 |
+
@reactive.event(input.plot_click)
|
54 |
+
def _():
|
55 |
+
# Update reactive values when plot is clicked
|
56 |
+
click_data = input.plot_click()
|
57 |
+
if click_data is not None:
|
58 |
+
x_coord.set(click_data["x"])
|
59 |
+
y_coord.set(click_data["y"])
|
60 |
+
# Update the numeric inputs
|
61 |
+
ui.update_numeric("x_select", value=round(click_data["x"],1))
|
62 |
+
ui.update_numeric("y_select", value=round(click_data["y"],1))
|
63 |
+
|
64 |
+
@reactive.effect
|
65 |
+
@reactive.event(input.x_select, input.y_select)
|
66 |
+
def _():
|
67 |
+
# Update reactive values when numeric inputs change
|
68 |
+
x_coord.set(round(input.x_select(),1))
|
69 |
+
y_coord.set(round(input.y_select(),1))
|
70 |
+
|
71 |
+
|
72 |
+
@render.plot
|
73 |
+
def plot():
|
74 |
+
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
|
75 |
+
|
76 |
+
h = ax.hexbin(df['launch_speed'],
|
77 |
+
df['launch_angle'],
|
78 |
+
C=df['xwoba'],
|
79 |
+
gridsize=(40,25),
|
80 |
+
cmap=cmap_sum,
|
81 |
+
vmin=0.0,
|
82 |
+
vmax=2.0,)
|
83 |
+
bounds=[0.0,0.4,0.8,1.2,1.6,2.0]
|
84 |
+
fig.colorbar(h, ax=ax, label='xwOBA',format=tkr.FormatStrFormatter('%.3f'),shrink=0.5,
|
85 |
+
ticks=bounds)
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
ax.set_xlabel('Launch Speed')
|
90 |
+
ax.set_ylabel('Launch Angle')
|
91 |
+
ax.set_title('Exit Velocity vs Launch Angle\nExpected Weighted On Base Average (xwOBA)\nBy: @TJStats, Data:MLB')
|
92 |
+
|
93 |
+
ax.grid(False)
|
94 |
+
ax.axis('square')
|
95 |
+
ax.set_xlim(0, 120)
|
96 |
+
ax.set_ylim(-30, 90)
|
97 |
+
|
98 |
+
x_select = input.x_select()
|
99 |
+
y_select = input.y_select()
|
100 |
+
|
101 |
+
|
102 |
+
sns.scatterplot(x=[x_select],y=[y_select],color='#648FFF',s=50,ax=ax,edgecolor='k',zorder=100)
|
103 |
+
|
104 |
+
xwoba_value = (xwoba_model.predict_proba([[y_select,x_select]]) @ [0, 0.883, 1.244, 1.569, 2.004])[0]
|
105 |
+
|
106 |
+
texts = [ax.text(x_select+3, y_select+3, f'xwOBA: {xwoba_value:.3f}', color='black', fontsize=12, weight='bold',
|
107 |
+
zorder=1000, bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'))]
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
adjustText.adjust_text(texts,
|
112 |
+
|
113 |
+
arrowprops=dict(arrowstyle='->', color='#DC267F'),avoid_self=True,
|
114 |
+
min_arrow_len =5)
|
115 |
+
# xwoba_value =
|
116 |
+
|
117 |
+
ax.axhline(y=y_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
|
118 |
+
ax.axvline(x=x_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
|
119 |
+
|
120 |
+
# ax.axis('square')
|
121 |
+
|
122 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
app = App(app_ui, server)
|
joblib_model/xwoba_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05bade9c0420657d3f0dfe35f0b1adbd2d5ae25c87a07bdf6629987f29926438
|
3 |
+
size 10684246
|
requirements.txt
CHANGED
@@ -1,14 +1,8 @@
|
|
1 |
joblib==1.3.2
|
2 |
-
lightgbm
|
3 |
matplotlib==3.5.1
|
4 |
numpy==1.23.5
|
5 |
pandas==1.5.2
|
6 |
polars==1.12.0
|
7 |
-
Requests==2.31.0
|
8 |
-
scipy==1.11.1
|
9 |
seaborn==0.11.1
|
10 |
-
scikit-learn==1.0.1
|
11 |
shiny==0.6.1
|
12 |
-
|
13 |
-
tqdm==4.62.3
|
14 |
-
pyarrow
|
|
|
1 |
joblib==1.3.2
|
|
|
2 |
matplotlib==3.5.1
|
3 |
numpy==1.23.5
|
4 |
pandas==1.5.2
|
5 |
polars==1.12.0
|
|
|
|
|
6 |
seaborn==0.11.1
|
|
|
7 |
shiny==0.6.1
|
8 |
+
|
|
|
|