nesticot commited on
Commit
310c527
·
verified ·
1 Parent(s): 8539b4d

Delete functions

Browse files
functions/__pycache__/df_update.cpython-39.pyc DELETED
Binary file (14.1 kB)
 
functions/__pycache__/pitch_summary_functions.cpython-39.pyc DELETED
Binary file (33.8 kB)
 
functions/df_update.py DELETED
@@ -1,472 +0,0 @@
1
- import polars as pl
2
- import numpy as np
3
- import joblib
4
-
5
- loaded_model = joblib.load('joblib_model/barrel_model.joblib')
6
- in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
7
- attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
8
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
9
- px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
10
- pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
11
-
12
-
13
- class df_update:
14
- def __init__(self):
15
- pass
16
-
17
- def update(self, df_clone: pl.DataFrame):
18
-
19
- df = df_clone.clone()
20
- # Assuming px_model is defined and df is your DataFrame
21
- hit_codes = ['single',
22
- 'double','home_run', 'triple']
23
-
24
- ab_codes = ['single', 'strikeout', 'field_out',
25
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
26
- 'double', 'field_error', 'home_run', 'triple',
27
- 'double_play',
28
- 'fielders_choice_out', 'strikeout_double_play',
29
- 'other_out','triple_play']
30
-
31
-
32
- obp_true_codes = ['single', 'walk',
33
- 'double','home_run', 'triple',
34
- 'hit_by_pitch', 'intent_walk']
35
-
36
- obp_codes = ['single', 'strikeout', 'walk', 'field_out',
37
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
38
- 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
39
- 'hit_by_pitch', 'double_play', 'intent_walk',
40
- 'fielders_choice_out', 'strikeout_double_play',
41
- 'sac_fly_double_play',
42
- 'other_out','triple_play']
43
-
44
-
45
- contact_codes = ['In play, no out',
46
- 'Foul', 'In play, out(s)',
47
- 'In play, run(s)',
48
- 'Foul Bunt']
49
-
50
- bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
51
-
52
-
53
- conditions_barrel = [
54
- df['launch_speed'].is_null(),
55
- (df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
56
- (df['launch_speed'] + df['launch_angle'] >= 124) &
57
- (df['launch_speed'] >= 98) &
58
- (df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
59
- ]
60
- choices_barrel = [False, True]
61
-
62
- conditions_tb = [
63
- (df['event_type'] == 'single'),
64
- (df['event_type'] == 'double'),
65
- (df['event_type'] == 'triple'),
66
- (df['event_type'] == 'home_run')
67
- ]
68
- choices_tb = [1, 2, 3, 4]
69
-
70
-
71
- conditions_woba = [
72
- df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
73
- df['event_type'] == 'walk',
74
- df['event_type'] == 'hit_by_pitch',
75
- df['event_type'] == 'single',
76
- df['event_type'] == 'double',
77
- df['event_type'] == 'triple',
78
- df['event_type'] == 'home_run'
79
- ]
80
- choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
81
-
82
- woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
83
-
84
- pitch_cat = {'FA': 'Fastball',
85
- 'FF': 'Fastball',
86
- 'FT': 'Fastball',
87
- 'FC': 'Fastball',
88
- 'FS': 'Off-Speed',
89
- 'FO': 'Off-Speed',
90
- 'SI': 'Fastball',
91
- 'ST': 'Breaking',
92
- 'SL': 'Breaking',
93
- 'CU': 'Breaking',
94
- 'KC': 'Breaking',
95
- 'SC': 'Off-Speed',
96
- 'GY': 'Off-Speed',
97
- 'SV': 'Breaking',
98
- 'CS': 'Breaking',
99
- 'CH': 'Off-Speed',
100
- 'KN': 'Off-Speed',
101
- 'EP': 'Breaking',
102
- 'UN': None,
103
- 'IN': None,
104
- 'PO': None,
105
- 'AB': None,
106
- 'AS': None,
107
- 'NP': None}
108
-
109
-
110
- df = df.with_columns([
111
- pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
112
- pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
113
- pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
114
- pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
115
- pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
116
- pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
117
- pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
118
- pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
119
- pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
120
- pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
121
- pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
122
- pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
123
- pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
124
- pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
125
- pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
126
- pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
127
- pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
128
- pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
129
- pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
130
- pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
131
- pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
132
- pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
133
- pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
134
- pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
135
- pl.lit(None).alias('attack_zone'),
136
- pl.lit(None).alias('woba_pred'),
137
- pl.lit(None).alias('woba_pred_contact')
138
-
139
- ])
140
-
141
- df = df.with_columns([
142
- pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
143
- pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
144
- pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
145
- pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
146
- pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
147
- pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
148
- pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
149
- pl.lit('average').alias('average'),
150
- pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
151
- pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
152
- pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
153
- pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
154
- pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
155
- pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
156
- pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
157
- pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
158
-
159
-
160
- ])
161
-
162
- df = df.with_columns([
163
- (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
164
- (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
165
- (df['launch_speed'] > 0).alias('bip_div'),
166
- (df['attack_zone'] == 0).alias('heart'),
167
- (df['attack_zone'] == 1).alias('shadow'),
168
- (df['attack_zone'] == 2).alias('chase'),
169
- (df['attack_zone'] == 3).alias('waste'),
170
- ((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
171
- ((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
172
- ((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
173
- ((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
174
- ((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
175
- ((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
176
- ((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
177
- ((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
178
- ])
179
-
180
-
181
- [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
182
-
183
- df = df.with_columns([
184
- pl.Series(
185
- [sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
186
- ).alias('woba_pred_predict')
187
- ])
188
-
189
- df = df.with_columns([
190
- pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
191
- .when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
192
- .when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
193
- .otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
194
- ])
195
-
196
- df = df.with_columns([
197
- pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
198
- pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
199
- ])
200
-
201
- df = df.with_columns([
202
- pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
203
- .when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
204
- .when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
205
- .when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
206
- .otherwise(pl.col('trajectory')).alias('trajectory')
207
- ])
208
-
209
-
210
- # Create one-hot encoded columns for the trajectory column
211
- dummy_df = df.select(pl.col('trajectory')).to_dummies()
212
-
213
- # Rename the one-hot encoded columns
214
- dummy_df = dummy_df.rename({
215
- 'trajectory_fly_ball': 'trajectory_fly_ball',
216
- 'trajectory_ground_ball': 'trajectory_ground_ball',
217
- 'trajectory_line_drive': 'trajectory_line_drive',
218
- 'trajectory_popup': 'trajectory_popup'
219
- })
220
-
221
- # Ensure the columns are present in the DataFrame
222
- for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
223
- if col not in dummy_df.columns:
224
- dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
225
-
226
- # Join the one-hot encoded columns back to the original DataFrame
227
- df = df.hstack(dummy_df)
228
-
229
- # Check if 'trajectory_null' column exists and drop it
230
- if 'trajectory_null' in df.columns:
231
- df = df.drop('trajectory_null')
232
-
233
- return df
234
-
235
- # Assuming df is your Polars DataFrame
236
- def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
237
- """
238
- Update summary statistics for pitchers or batters.
239
-
240
- Parameters:
241
- df (pl.DataFrame): The input Polars DataFrame containing player statistics.
242
- pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
243
-
244
- Returns:
245
- pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
246
- """
247
-
248
- # Determine the position based on the pitcher flag
249
- if pitcher:
250
- position = 'pitcher'
251
- else:
252
- position = 'batter'
253
-
254
- # Group by position_id and position_name, then aggregate various statistics
255
- df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
256
- pl.col('pa').sum().alias('pa'),
257
- pl.col('ab').sum().alias('ab'),
258
- pl.col('obp').sum().alias('obp_pa'),
259
- pl.col('hits').sum().alias('hits'),
260
- pl.col('on_base').sum().alias('on_base'),
261
- pl.col('k').sum().alias('k'),
262
- pl.col('bb').sum().alias('bb'),
263
- pl.col('bb_minus_k').sum().alias('bb_minus_k'),
264
- pl.col('csw').sum().alias('csw'),
265
- pl.col('bip').sum().alias('bip'),
266
- pl.col('bip_div').sum().alias('bip_div'),
267
- pl.col('tb').sum().alias('tb'),
268
- pl.col('woba').sum().alias('woba'),
269
- pl.col('woba_contact').sum().alias('woba_contact'),
270
- pl.col('woba_pred').sum().alias('xwoba'),
271
- pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
272
- pl.col('woba_codes').sum().alias('woba_codes'),
273
- pl.col('xwoba_codes').sum().alias('xwoba_codes'),
274
- pl.col('hard_hit').sum().alias('hard_hit'),
275
- pl.col('barrel').sum().alias('barrel'),
276
- pl.col('sweet_spot').sum().alias('sweet_spot'),
277
- pl.col('launch_speed').max().alias('max_launch_speed'),
278
- pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
279
- pl.col('launch_speed').mean().alias('launch_speed'),
280
- pl.col('launch_angle').mean().alias('launch_angle'),
281
- pl.col('is_pitch').sum().alias('pitches'),
282
- pl.col('swings').sum().alias('swings'),
283
- pl.col('in_zone').sum().alias('in_zone'),
284
- pl.col('out_zone').sum().alias('out_zone'),
285
- pl.col('whiffs').sum().alias('whiffs'),
286
- pl.col('zone_swing').sum().alias('zone_swing'),
287
- pl.col('zone_contact').sum().alias('zone_contact'),
288
- pl.col('ozone_swing').sum().alias('ozone_swing'),
289
- pl.col('ozone_contact').sum().alias('ozone_contact'),
290
- pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
291
- pl.col('trajectory_line_drive').sum().alias('line_drive'),
292
- pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
293
- pl.col('trajectory_popup').sum().alias('pop_up'),
294
- pl.col('attack_zone').count().alias('attack_zone'),
295
- pl.col('heart').sum().alias('heart'),
296
- pl.col('shadow').sum().alias('shadow'),
297
- pl.col('chase').sum().alias('chase'),
298
- pl.col('waste').sum().alias('waste'),
299
- pl.col('heart_swing').sum().alias('heart_swing'),
300
- pl.col('shadow_swing').sum().alias('shadow_swing'),
301
- pl.col('chase_swing').sum().alias('chase_swing'),
302
- pl.col('waste_swing').sum().alias('waste_swing'),
303
- pl.col('heart_whiff').sum().alias('heart_whiff'),
304
- pl.col('shadow_whiff').sum().alias('shadow_whiff'),
305
- pl.col('chase_whiff').sum().alias('chase_whiff'),
306
- pl.col('waste_whiff').sum().alias('waste_whiff')
307
- ])
308
-
309
- # Add calculated columns to the summary DataFrame
310
- df_summ = df_summ.with_columns([
311
- (pl.col('hits') / pl.col('ab')).alias('avg'),
312
- (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
313
- (pl.col('tb') / pl.col('ab')).alias('slg'),
314
- (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
315
- (pl.col('k') / pl.col('pa')).alias('k_percent'),
316
- (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
317
- (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
318
- (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
319
- (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
320
- (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
321
- (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
322
- (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
323
- (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
324
- (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
325
- (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
326
- (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
327
- (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
328
- (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
329
- (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
330
- (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
331
- (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
332
- (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
333
- (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
334
- (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
335
- (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
336
- (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
337
- (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
338
- (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
339
- (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
340
- (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
341
- (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
342
- (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
343
- (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
344
- (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
345
- (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
346
- (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
347
- (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
348
- (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
349
- (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
350
- (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
351
- ])
352
-
353
- return df_summ
354
-
355
-
356
-
357
-
358
-
359
-
360
- # Assuming df is your Polars DataFrame
361
- def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
362
- """
363
- Update summary statistics for pitchers or batters.
364
-
365
- Parameters:
366
- df (pl.DataFrame): The input Polars DataFrame containing player statistics.
367
- pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
368
-
369
- Returns:
370
- pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
371
- """
372
-
373
- # Group by position_id and position_name, then aggregate various statistics
374
- df_summ = df.group_by(selection).agg([
375
- pl.col('pa').sum().alias('pa'),
376
- pl.col('ab').sum().alias('ab'),
377
- pl.col('obp').sum().alias('obp_pa'),
378
- pl.col('hits').sum().alias('hits'),
379
- pl.col('on_base').sum().alias('on_base'),
380
- pl.col('k').sum().alias('k'),
381
- pl.col('bb').sum().alias('bb'),
382
- pl.col('bb_minus_k').sum().alias('bb_minus_k'),
383
- pl.col('csw').sum().alias('csw'),
384
- pl.col('bip').sum().alias('bip'),
385
- pl.col('bip_div').sum().alias('bip_div'),
386
- pl.col('tb').sum().alias('tb'),
387
- pl.col('woba').sum().alias('woba'),
388
- pl.col('woba_contact').sum().alias('woba_contact'),
389
- pl.col('woba_pred').sum().alias('xwoba'),
390
- pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
391
- pl.col('woba_codes').sum().alias('woba_codes'),
392
- pl.col('xwoba_codes').sum().alias('xwoba_codes'),
393
- pl.col('hard_hit').sum().alias('hard_hit'),
394
- pl.col('barrel').sum().alias('barrel'),
395
- pl.col('sweet_spot').sum().alias('sweet_spot'),
396
- pl.col('launch_speed').max().alias('max_launch_speed'),
397
- pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
398
- pl.col('launch_speed').mean().alias('launch_speed'),
399
- pl.col('launch_angle').mean().alias('launch_angle'),
400
- pl.col('is_pitch').sum().alias('pitches'),
401
- pl.col('swings').sum().alias('swings'),
402
- pl.col('in_zone').sum().alias('in_zone'),
403
- pl.col('out_zone').sum().alias('out_zone'),
404
- pl.col('whiffs').sum().alias('whiffs'),
405
- pl.col('zone_swing').sum().alias('zone_swing'),
406
- pl.col('zone_contact').sum().alias('zone_contact'),
407
- pl.col('ozone_swing').sum().alias('ozone_swing'),
408
- pl.col('ozone_contact').sum().alias('ozone_contact'),
409
- pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
410
- pl.col('trajectory_line_drive').sum().alias('line_drive'),
411
- pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
412
- pl.col('trajectory_popup').sum().alias('pop_up'),
413
- pl.col('attack_zone').count().alias('attack_zone'),
414
- pl.col('heart').sum().alias('heart'),
415
- pl.col('shadow').sum().alias('shadow'),
416
- pl.col('chase').sum().alias('chase'),
417
- pl.col('waste').sum().alias('waste'),
418
- pl.col('heart_swing').sum().alias('heart_swing'),
419
- pl.col('shadow_swing').sum().alias('shadow_swing'),
420
- pl.col('chase_swing').sum().alias('chase_swing'),
421
- pl.col('waste_swing').sum().alias('waste_swing'),
422
- pl.col('heart_whiff').sum().alias('heart_whiff'),
423
- pl.col('shadow_whiff').sum().alias('shadow_whiff'),
424
- pl.col('chase_whiff').sum().alias('chase_whiff'),
425
- pl.col('waste_whiff').sum().alias('waste_whiff')
426
- ])
427
-
428
- # Add calculated columns to the summary DataFrame
429
- df_summ = df_summ.with_columns([
430
- (pl.col('hits') / pl.col('ab')).alias('avg'),
431
- (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
432
- (pl.col('tb') / pl.col('ab')).alias('slg'),
433
- (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
434
- (pl.col('k') / pl.col('pa')).alias('k_percent'),
435
- (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
436
- (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
437
- (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
438
- (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
439
- (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
440
- (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
441
- (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
442
- (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
443
- (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
444
- (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
445
- (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
446
- (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
447
- (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
448
- (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
449
- (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
450
- (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
451
- (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
452
- (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
453
- (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
454
- (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
455
- (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
456
- (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
457
- (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
458
- (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
459
- (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
460
- (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
461
- (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
462
- (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
463
- (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
464
- (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
465
- (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
466
- (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
467
- (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
468
- (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
469
- (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
470
- ])
471
-
472
- return df_summ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/pitch_summary_functions.py DELETED
@@ -1,1140 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import json
4
- from matplotlib.ticker import FuncFormatter
5
- from matplotlib.ticker import MaxNLocator
6
- import math
7
- from matplotlib.patches import Ellipse
8
- import matplotlib.transforms as transforms
9
- import matplotlib.colors
10
- import matplotlib.colors as mcolors
11
- import seaborn as sns
12
- import matplotlib.pyplot as plt
13
- import requests
14
- import polars as pl
15
- from PIL import Image
16
- import requests
17
- from io import BytesIO
18
- from matplotlib.offsetbox import OffsetImage, AnnotationBbox
19
- import matplotlib.pyplot as plt
20
- import matplotlib.gridspec as gridspec
21
- import PIL
22
-
23
-
24
- ### PITCH COLOURS ###
25
-
26
- # Dictionary to map pitch types to their corresponding colors and names
27
- pitch_colours = {
28
- ## Fastballs ##
29
- 'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
30
- 'FA': {'colour': '#FF007D', 'name': 'Fastball'},
31
- 'SI': {'colour': '#98165D', 'name': 'Sinker'},
32
- 'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
33
-
34
- ## Offspeed ##
35
- 'CH': {'colour': '#F79E70', 'name': 'Changeup'},
36
- 'FS': {'colour': '#FE6100', 'name': 'Splitter'},
37
- 'SC': {'colour': '#F08223', 'name': 'Screwball'},
38
- 'FO': {'colour': '#FFB000', 'name': 'Forkball'},
39
-
40
- ## Sliders ##
41
- 'SL': {'colour': '#67E18D', 'name': 'Slider'},
42
- 'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
43
- 'SV': {'colour': '#376748', 'name': 'Slurve'},
44
-
45
- ## Curveballs ##
46
- 'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
47
- 'CU': {'colour': '#3025CE', 'name': 'Curveball'},
48
- 'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
49
- 'EP': {'colour': '#648FFF', 'name': 'Eephus'},
50
-
51
- ## Others ##
52
- 'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
53
- 'KN': {'colour': '#867A08', 'name': 'Knuckle Ball'},
54
- 'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
55
- 'UN': {'colour': '#9C8975', 'name': 'Unknown'},
56
- }
57
-
58
- # Create dictionaries for pitch types and their attributes
59
- dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
60
- dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
61
- dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
62
- dict_pitch_desc_type.update({'Four-Seam Fastball':'FF'})
63
- dict_pitch_desc_type.update({'All':'All'})
64
- dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
65
- dict_pitch_name.update({'Four-Seam Fastball':'#FF007D'})
66
-
67
- font_properties = {'family': 'calibi', 'size': 12}
68
- font_properties_titles = {'family': 'calibi', 'size': 20}
69
- font_properties_axes = {'family': 'calibi', 'size': 16}
70
-
71
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
72
-
73
- ### FANGRAPHS STATS DICT ###
74
- fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
75
- 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
76
- 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
77
- 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
78
- 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
79
- 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
80
- 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
81
- 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
82
- 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
83
- 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
84
- 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
85
- 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
86
- 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
87
- 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
88
- 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
89
- 'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
90
- '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
91
- '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
92
- 'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
93
- 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
94
- 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
95
- 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
96
- 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
97
- 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
98
- 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
99
- 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
100
- 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
101
- 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
102
- 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
103
- 'G':{'table_header':'$\\bf{G}$','format':'.0f',},
104
- 'strikePercentage':{'table_header':'$\\bf{Strike\%}$','format':'.1%'} }
105
-
106
- colour_palette = ['#FFB000','#648FFF','#785EF0',
107
- '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
108
-
109
- ### GET COLOURS ###
110
- def get_color(value, normalize, cmap_sum):
111
- """
112
- Get the color corresponding to a value based on a colormap and normalization.
113
-
114
- Parameters
115
- ----------
116
- value : float
117
- The value to be mapped to a color.
118
- normalize : matplotlib.colors.Normalize
119
- The normalization function to scale the value.
120
- cmap_sum : matplotlib.colors.Colormap
121
- The colormap to use for mapping the value to a color.
122
-
123
- Returns
124
- -------
125
- str
126
- The hexadecimal color code corresponding to the value.
127
- """
128
- color = cmap_sum(normalize(value))
129
- return mcolors.to_hex(color)
130
-
131
- ### PITCH ELLIPSE ###
132
- def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
133
- """
134
- Create a plot of the covariance confidence ellipse of *x* and *y*.
135
-
136
- Parameters
137
- ----------
138
- x, y : array-like, shape (n, )
139
- Input data.
140
-
141
- ax : matplotlib.axes.Axes
142
- The axes object to draw the ellipse into.
143
-
144
- n_std : float
145
- The number of standard deviations to determine the ellipse's radiuses.
146
-
147
- **kwargs
148
- Forwarded to `~matplotlib.patches.Ellipse`
149
-
150
- Returns
151
- -------
152
- matplotlib.patches.Ellipse
153
- """
154
-
155
- if len(x) != len(y):
156
- raise ValueError("x and y must be the same size")
157
- try:
158
- cov = np.cov(x, y)
159
- pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
160
- # Using a special case to obtain the eigenvalues of this
161
- # two-dimensional dataset.
162
- ell_radius_x = np.sqrt(1 + pearson)
163
- ell_radius_y = np.sqrt(1 - pearson)
164
- ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
165
- facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
166
-
167
-
168
- # Calculating the standard deviation of x from
169
- # the squareroot of the variance and multiplying
170
- # with the given number of standard deviations.
171
- scale_x = np.sqrt(cov[0, 0]) * n_std
172
- mean_x = x.mean()
173
-
174
-
175
- # calculating the standard deviation of y ...
176
- scale_y = np.sqrt(cov[1, 1]) * n_std
177
- mean_y = y.mean()
178
-
179
-
180
- transf = transforms.Affine2D() \
181
- .rotate_deg(45) \
182
- .scale(scale_x, scale_y) \
183
- .translate(mean_x, mean_y)
184
-
185
-
186
-
187
- ellipse.set_transform(transf + ax.transData)
188
- except ValueError:
189
- return
190
-
191
- return ax.add_patch(ellipse)
192
- ### VELOCITY KDES ###
193
- def velocity_kdes(df: pl.DataFrame, ax: plt.Axes, gs: gridspec.GridSpec, gs_x: list, gs_y: list, fig: plt.Figure):
194
- """
195
- Plot the velocity KDEs for different pitch types.
196
-
197
- Parameters
198
- ----------
199
- df : pl.DataFrame
200
- The DataFrame containing pitch data.
201
- ax : plt.Axes
202
- The axis to plot on.
203
- gs : GridSpec
204
- The GridSpec for the subplot layout.
205
- gs_x : list
206
- The x-coordinates for the GridSpec.
207
- gs_y : list
208
- The y-coordinates for the GridSpec.
209
- fig : plt.Figure
210
- The figure to plot on.
211
- """
212
- # Get unique pitch types sorted by pitch count
213
- items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
214
-
215
- # Create the inner subplot inside the outer subplot
216
- ax.axis('off')
217
- ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
218
- inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
219
- ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
220
-
221
- for idx, i in enumerate(items_in_order):
222
- pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
223
- if np.unique(pitch_data).size == 1: # Check if all values are the same
224
- ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4, color=dict_colour[i], zorder=20)
225
- else:
226
- sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True, clip=(pitch_data.min(), pitch_data.max()), color=dict_colour[i])
227
-
228
- # Plot the mean release speed for the current data
229
- df_average = df.filter(df['pitch_type'] == i)['start_speed']
230
- ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle='--')
231
-
232
- # Plot the mean release speed for the statcast group data
233
- df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
234
- df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
235
- ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle=':')
236
-
237
- ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
238
- ax_top[idx].set_xlabel('')
239
- ax_top[idx].set_ylabel('')
240
- if idx < len(items_in_order) - 1:
241
- ax_top[idx].spines['top'].set_visible(False)
242
- ax_top[idx].spines['right'].set_visible(False)
243
- ax_top[idx].spines['left'].set_visible(False)
244
- ax_top[idx].tick_params(axis='x', colors='none')
245
-
246
- ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
247
- ax_top[idx].set_yticks([])
248
- ax_top[idx].grid(axis='x', linestyle='--')
249
- ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes, fontsize=14, va='center', ha='right')
250
-
251
- ax_top[-1].spines['top'].set_visible(False)
252
- ax_top[-1].spines['right'].set_visible(False)
253
- ax_top[-1].spines['left'].set_visible(False)
254
- ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
255
- ax_top[-1].set_xlabel('Velocity (mph)')
256
-
257
- ### TJ STUFF+ ROLLING ###
258
- def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
259
- """
260
- Plot the rolling average of tjStuff+ for different pitch types.
261
-
262
- Parameters
263
- ----------
264
- df : pl.DataFrame
265
- The DataFrame containing pitch data.
266
- window : int
267
- The window size for calculating the rolling average.
268
- ax : plt.Axes
269
- The axis to plot on.
270
- """
271
- # Get unique pitch types sorted by pitch count
272
- items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
273
-
274
- # Plot the rolling average for each pitch type
275
- for i in items_in_order:
276
- pitch_data = df.filter(pl.col('pitch_type') == i)
277
- if pitch_data['pitch_count'].max() >= window:
278
- sns.lineplot(
279
- x=range(1, pitch_data['pitch_count'].max() + 1),
280
- y=pitch_data['tj_stuff_plus'].rolling_mean(window),
281
- color=dict_colour[i],
282
- ax=ax,
283
- linewidth=3
284
- )
285
-
286
- # Adjust x-axis limits to start from 1
287
- ax.set_xlim(window, df['pitch_count'].max())
288
- ax.set_ylim(70, 130)
289
- ax.set_xlabel('Pitches', fontdict=font_properties_axes)
290
- ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
291
- ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
292
- ax.xaxis.set_major_locator(MaxNLocator(integer=True))
293
-
294
- ### TJ STUFF+ ROLLING ###
295
- def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
296
- """
297
- Plot the rolling average of tjStuff+ for different pitch types over games.
298
-
299
- Parameters
300
- ----------
301
- df : pl.DataFrame
302
- The DataFrame containing pitch data.
303
- window : int
304
- The window size for calculating the rolling average.
305
- ax : plt.Axes
306
- The axis to plot on.
307
- """
308
- # Map game_id to sequential numbers
309
- date_to_number = {date: i + 1 for i, date in enumerate(df['game_id'].unique(maintain_order=True))}
310
-
311
- # Add a column with the sequential game numbers
312
- df_plot = df.with_columns(
313
- pl.col("game_id").map_elements(lambda x: date_to_number.get(x, x)).alias("start_number")
314
- )
315
-
316
- # Group by relevant columns and calculate mean tj_stuff_plus
317
- plot_game_roll = df_plot.group_by(['start_number', 'game_id', 'game_date', 'pitch_type', 'pitch_description']).agg(
318
- pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus')
319
- ).sort('start_number', descending=False)
320
-
321
- # Get the list of pitch types ordered by frequency
322
- sorted_value_counts = df['pitch_type'].value_counts().sort('count', descending=True)
323
- items_in_order = sorted_value_counts['pitch_type'].to_list()
324
-
325
- # Plot the rolling average for each pitch type
326
- for i in items_in_order:
327
- df_item = plot_game_roll.filter(pl.col('pitch_type') == i)
328
- df_item = df_item.with_columns(
329
- pl.col("start_number").cast(pl.Int64)
330
- ).join(
331
- pl.DataFrame({"start_number": list(date_to_number.values())}),
332
- on="start_number",
333
- how="outer"
334
- ).sort("start_number_right").with_columns([
335
- pl.col("start_number").fill_null(strategy="forward").fill_null(strategy="backward"),
336
- pl.col("tj_stuff_plus").fill_null(strategy="forward").fill_null(strategy="backward"),
337
- pl.col("pitch_type").fill_null(strategy="forward").fill_null(strategy="backward"),
338
- pl.col("pitch_description").fill_null(strategy="forward").fill_null(strategy="backward")
339
- ])
340
-
341
- sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
342
- y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window,min_periods=1),
343
- color=dict_colour[i],
344
- ax=ax, linewidth=3)
345
-
346
- # Highlight missing game data points
347
- for n in range(len(df_item)):
348
- if df_item['game_id'].is_null()[n]:
349
- sns.scatterplot(x=[df_item['start_number_right'][n]],
350
- y=[df_item['tj_stuff_plus'].rolling_mean(window,min_periods=1)[n]],
351
- color='white',
352
- ec=dict_colour[i],
353
- ax=ax,
354
- zorder=100)
355
-
356
- # Adjust x-axis limits to start from 1
357
- ax.set_xlim(1, max(df_item['start_number']))
358
- ax.set_ylim(70, 130)
359
- ax.set_xlabel('Games', fontdict=font_properties_axes)
360
- ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
361
- ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
362
- ax.xaxis.set_major_locator(MaxNLocator(integer=True))
363
-
364
- def break_plot(df: pl.DataFrame, ax: plt.Axes):
365
- """
366
- Plot the pitch breaks for different pitch types.
367
-
368
- Parameters
369
- ----------
370
- df : pl.DataFrame
371
- The DataFrame containing pitch data.
372
- ax : plt.Axes
373
- The axis to plot on.
374
- """
375
- # Get unique pitch types sorted by pitch count
376
- label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
377
-
378
- # Plot confidence ellipses for each pitch type
379
- for idx, label in enumerate(label_labels):
380
- subset = df.filter(pl.col('pitch_type') == label)
381
- if len(subset) > 4:
382
- try:
383
- confidence_ellipse(subset['hb'], subset['ivb'], ax=ax, edgecolor=dict_colour[label], n_std=2, facecolor=dict_colour[label], alpha=0.2)
384
- except ValueError:
385
- return
386
-
387
- # Plot scatter plot for pitch breaks
388
- if df['pitcher_hand'][0] == 'R':
389
- sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
390
- elif df['pitcher_hand'][0] == 'L':
391
- sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
392
-
393
- # Set axis limits
394
- ax.set_xlim((-25, 25))
395
- ax.set_ylim((-25, 25))
396
-
397
- # Add horizontal and vertical lines
398
- ax.hlines(y=0, xmin=-50, xmax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
399
- ax.vlines(x=0, ymin=-50, ymax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
400
-
401
- # Set axis labels and title
402
- ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
403
- ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
404
- ax.set_title("Pitch Breaks", fontdict=font_properties_titles)
405
-
406
- # Remove legend
407
- ax.get_legend().remove()
408
-
409
- # Set tick labels
410
- ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
411
- ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
412
-
413
- # Add text annotations for glove side and arm side
414
- if df['pitcher_hand'][0] == 'R':
415
- ax.text(-24.5, -24.5, s='← Glove Side', fontstyle='italic', ha='left', va='bottom',
416
- bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
417
- ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
418
- bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
419
- elif df['pitcher_hand'][0] == 'L':
420
- ax.invert_xaxis()
421
- ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
422
- bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
423
- ax.text(-24.5, -24.5, s='Glove Side →', fontstyle='italic', ha='right', va='bottom',
424
- bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
425
-
426
- # Set aspect ratio and format axis ticks
427
- ax.set_aspect('equal', adjustable='box')
428
- ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
429
- ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
430
-
431
- # DEFINE STRIKE ZONE
432
- strike_zone = pl.DataFrame({
433
- 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
434
- 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
435
- })
436
-
437
- ### STRIKE ZONE ###
438
- def draw_line(axis, alpha_spot=1, catcher_p=True):
439
- """
440
- Draw the strike zone and home plate on the given axis.
441
-
442
- Parameters
443
- ----------
444
- axis : matplotlib.axes.Axes
445
- The axis to draw the strike zone on.
446
- alpha_spot : float, optional
447
- The transparency level of the lines (default is 1).
448
- catcher_p : bool, optional
449
- Whether to draw the catcher's perspective (default is True).
450
- """
451
- # Draw the strike zone
452
- axis.plot(strike_zone['PlateLocSide'].to_list(), strike_zone['PlateLocHeight'].to_list(),
453
- color='black', linewidth=1.3, zorder=3, alpha=alpha_spot)
454
-
455
- if catcher_p:
456
- # Draw home plate from catcher's perspective
457
- axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
458
- axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
459
- axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
460
- axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
461
- axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
462
- else:
463
- # Draw home plate from pitcher's perspective
464
- axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
465
- axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
466
- axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
467
- axis.plot([0, 0.9], [-0.35, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
468
- axis.plot([0.9, 0.708], [-0.1, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
469
-
470
- def location_plot(df: pl.DataFrame, ax: plt.Axes, hand: str):
471
- """
472
- Plot the pitch locations for different pitch types against a specific batter hand.
473
-
474
- Parameters
475
- ----------
476
- df : pl.DataFrame
477
- The DataFrame containing pitch data.
478
- ax : plt.Axes
479
- The axis to plot on.
480
- hand : str
481
- The batter hand ('L' for left-handed, 'R' for right-handed).
482
- """
483
- # Get unique pitch types sorted by pitch count
484
- label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
485
-
486
- # Plot confidence ellipses for each pitch type
487
- for label in label_labels:
488
- subset = df.filter((pl.col('pitch_type') == label) & (pl.col('batter_hand') == hand))
489
- if len(subset) >= 5:
490
- confidence_ellipse(subset['px'], subset['pz'], ax=ax, edgecolor=dict_colour[label], n_std=1.5, facecolor=dict_colour[label], alpha=0.3)
491
-
492
- # Group pitch locations by pitch type and calculate mean values
493
- pitch_location_group = (
494
- df.filter(pl.col("batter_hand") == hand)
495
- .group_by("pitch_type")
496
- .agg([
497
- pl.col("start_speed").count().alias("pitches"),
498
- pl.col("px").mean().alias("px"),
499
- pl.col("pz").mean().alias("pz")
500
- ])
501
- )
502
-
503
- # Calculate pitch percentages
504
- total_pitches = pitch_location_group['pitches'].sum()
505
- pitch_location_group = pitch_location_group.with_columns(
506
- (pl.col("pitches") / total_pitches).alias("pitch_percent")
507
- )
508
-
509
- # Plot pitch locations
510
- sns.scatterplot(ax=ax, x=pitch_location_group['px'], y=pitch_location_group['pz'],
511
- hue=pitch_location_group['pitch_type'], palette=dict_colour, ec='black',
512
- s=pitch_location_group['pitch_percent'] * 750, linewidth=2, zorder=2)
513
-
514
- # Customize plot appearance
515
- ax.axis('square')
516
- draw_line(ax, alpha_spot=0.75, catcher_p=False)
517
- ax.axis('off')
518
- ax.set_xlim((-2.75, 2.75))
519
- ax.set_ylim((-0.5, 5))
520
- if len(pitch_location_group['px']) > 0:
521
- ax.get_legend().remove()
522
- ax.grid(False)
523
- ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches", fontdict=font_properties_titles)
524
-
525
-
526
- def summary_table(df: pl.DataFrame, ax: plt.Axes):
527
- """
528
- Create a summary table of pitch data.
529
-
530
- Parameters
531
- ----------
532
- df : pl.DataFrame
533
- The DataFrame containing pitch data.
534
- ax : plt.Axes
535
- The axis to plot the table on.
536
- """
537
- # Aggregate pitch data by pitch description
538
- df_agg = df.group_by("pitch_description").agg(
539
- pl.col('is_pitch').sum().alias('count'),
540
- (pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
541
- pl.col('start_speed').mean().alias('start_speed'),
542
- pl.col('ivb').mean().alias('ivb'),
543
- pl.col('hb').mean().alias('hb'),
544
- pl.col('spin_rate').mean().alias('spin_rate'),
545
- pl.col('vaa').mean().alias('vaa'),
546
- pl.col('haa').mean().alias('haa'),
547
- pl.col('z0').mean().alias('z0'),
548
- pl.col('x0').mean().alias('x0'),
549
- pl.col('extension').mean().alias('extension'),
550
- (((pl.col('spin_direction').mean() + 180) % 360 // 30) +
551
- (((pl.col('spin_direction').mean() + 180) % 360 % 30 / 30 / 100 * 60).round(2) * 10).round(0) // 1.5 / 4)
552
- .cast(pl.Float64).map_elements(lambda x: f"{int(x)}:{int((x % 1) * 60):02d}", return_dtype=pl.Utf8).alias('clock_time'),
553
- pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
554
- pl.col('pitch_grade').mean().alias('pitch_grade'),
555
- (pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
556
- (pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
557
- (pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
558
- (pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
559
- ).sort("count", descending=True)
560
-
561
- # Aggregate all pitch data
562
- df_agg_all = df.group_by(pl.lit("All").alias("pitch_description")).agg(
563
- pl.col('is_pitch').sum().alias('count'),
564
- (pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
565
- pl.lit(None).alias('start_speed'),
566
- pl.lit(None).alias('ivb'),
567
- pl.lit(None).alias('hb'),
568
- pl.lit(None).alias('spin_rate'),
569
- pl.lit(None).alias('vaa'),
570
- pl.lit(None).alias('haa'),
571
- pl.lit(None).alias('z0'),
572
- pl.lit(None).alias('x0'),
573
- pl.col('extension').mean().alias('extension'),
574
- pl.lit(None).alias('clock_time'),
575
- pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
576
- pl.lit(None).alias('pitch_grade'),
577
- (pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
578
- (pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
579
- (pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
580
- (pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
581
- )
582
-
583
- # Concatenate aggregated data
584
- df_agg = pl.concat([df_agg, df_agg_all]).fill_nan(None)
585
-
586
- # Load statcast pitch summary data
587
- statcast_pitch_summary = pl.read_csv('functions/statcast_2024_grouped.csv')
588
-
589
- # Create table
590
- table = ax.table(cellText=df_agg.fill_nan('—').fill_null('—').to_numpy(), colLabels=df_agg.columns, cellLoc='center',
591
- colWidths=[2.3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], bbox=[0.0, 0, 1, 0.8])
592
-
593
- # Set table properties
594
- min_font_size = 14
595
- table.auto_set_font_size(False)
596
- table.set_fontsize(min_font_size)
597
- table.scale(1, 0.5)
598
-
599
- # Set font size for values
600
- min_font_size = 18
601
- for i in range(len(df_agg) + 1):
602
- for j in range(len(df_agg.columns)):
603
- if i > 0: # Skip the header row
604
- cell = table.get_celld()[i, j]
605
- cell.set_fontsize(min_font_size)
606
-
607
- # Define color maps
608
- cmap_sum = mcolors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
609
- cmap_sum_r = mcolors.LinearSegmentedColormap.from_list("", ['#FFB000', '#FFFFFF', '#648FFF'])
610
-
611
- # Update table cells with colors and text properties
612
- for i in range(len(df_agg)):
613
- pitch_check = dict_pitch_desc_type[df_agg['pitch_description'][i]]
614
- cell_text = table.get_celld()[(i + 1, 0)].get_text().get_text()
615
-
616
- if cell_text != 'All':
617
- table.get_celld()[(i + 1, 0)].set_facecolor(dict_pitch_name[cell_text])
618
- text_props = {'color': '#000000', 'fontweight': 'bold'} if cell_text in ['Split-Finger', 'Slider', 'Changeup'] else {'color': '#ffffff', 'fontweight': 'bold'}
619
- table.get_celld()[(i + 1, 0)].set_text_props(**text_props)
620
- if cell_text == 'Four-Seam Fastball':
621
- table.get_celld()[(i + 1, 0)].get_text().set_text('4-Seam')
622
-
623
- select_df = statcast_pitch_summary.filter(statcast_pitch_summary['pitch_type'] == pitch_check)
624
-
625
- # Apply color to specific columns based on normalized values
626
- columns_to_color = [(3, 'release_speed', 0.95, 1.05), (11, 'release_extension', 0.9, 1.1), (13, None, 80, 120),
627
- (14, None, 30, 70), (15, 'in_zone_rate', 0.7, 1.3), (16, 'chase_rate', 0.7, 1.3),
628
- (17, 'whiff_rate', 0.7, 1.3), (18, 'xwoba', 0.7, 1.3)]
629
-
630
- for col, stat, vmin_factor, vmax_factor in columns_to_color:
631
- cell_value = table.get_celld()[(i + 1, col)].get_text().get_text()
632
- if cell_value != '—':
633
- vmin = select_df[stat].mean() * vmin_factor if stat else vmin_factor
634
- vmax = select_df[stat].mean() * vmax_factor if stat else vmax_factor
635
- normalize = mcolors.Normalize(vmin=vmin, vmax=vmax)
636
- cmap = cmap_sum if col != 18 else cmap_sum_r
637
- table.get_celld()[(i + 1, col)].set_facecolor(get_color(float(cell_value.strip('%')), normalize, cmap))
638
-
639
- # Set header text properties
640
- table.get_celld()[(len(df_agg), 0)].set_text_props(color='#000000', fontweight='bold')
641
-
642
- # Update column names
643
- new_column_names = ['$\\bf{Pitch\\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$',
644
- '$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$',
645
- '$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Grade}$', '$\\bf{Zone\\%}$',
646
- '$\\bf{Chase\\%}$', '$\\bf{Whiff\\%}$', '$\\bf{xwOBA}$\n$\\bf{Contact}$']
647
-
648
- for i, col_name in enumerate(new_column_names):
649
- table.get_celld()[(0, i)].get_text().set_text(col_name)
650
-
651
- # Format cell values
652
- def format_cells(columns, fmt):
653
- for col in columns:
654
- col_idx = df_agg.columns.index(col)
655
- for row in range(1, len(df_agg) + 1):
656
- cell_value = table.get_celld()[(row, col_idx)].get_text().get_text()
657
- if cell_value != '—':
658
- table.get_celld()[(row, col_idx)].get_text().set_text(fmt.format(float(cell_value.strip('%'))))
659
-
660
- format_cells(['start_speed', 'ivb', 'hb', 'vaa', 'haa', 'z0', 'x0', 'extension'], '{:,.1f}')
661
- format_cells(['xwobacon'], '{:,.3f}')
662
- format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}')
663
- format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}')
664
-
665
- # Create legend for pitch types
666
- items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy())
667
- colour_pitches = [dict_colour[x] for x in items_in_order]
668
- label = [dict_pitch[x] for x in items_in_order]
669
- handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
670
- if len(label) > 5:
671
- ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
672
- fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16})
673
- else:
674
- ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
675
- fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
676
- ax.axis('off')
677
-
678
- def plot_footer(ax: plt.Axes):
679
- """
680
- Add footer text to the plot.
681
-
682
- Parameters
683
- ----------
684
- ax : plt.Axes
685
- The axis to add the footer text to.
686
- """
687
- # Add footer text
688
- ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
689
- ax.text(0.5, 0.25,
690
- '''
691
- Colour Coding Compares to League Average By Pitch
692
- tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
693
- tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
694
- Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
695
- ''',
696
- ha='center', va='bottom', fontsize=12)
697
- ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
698
- ax.axis('off')
699
-
700
- # Function to get an image from a URL and display it on the given axis
701
- def player_headshot(player_input: str, ax: plt.Axes, sport_id: int, season: int):
702
- """
703
- Display the player's headshot image on the given axis.
704
-
705
- Parameters
706
- ----------
707
- player_input : str
708
- The player's ID.
709
- ax : plt.Axes
710
- The axis to display the image on.
711
- sport_id : int
712
- The sport ID (1 for MLB, other for minor leagues).
713
- season : int
714
- The season year.
715
- """
716
- try:
717
- # Construct the URL for the player's headshot image based on sport ID
718
- if int(sport_id) == 1:
719
- url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
720
- else:
721
- url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
722
-
723
- # Send a GET request to the URL and open the image from the response content
724
- response = requests.get(url)
725
- img = Image.open(BytesIO(response.content))
726
-
727
- # Display the image on the axis
728
- ax.set_xlim(0, 1.3)
729
- ax.set_ylim(0, 1)
730
- ax.imshow(img, extent=[0, 1, 0, 1] if sport_id == 1 else [1/6, 5/6, 0, 1], origin='upper')
731
- except PIL.UnidentifiedImageError:
732
- ax.axis('off')
733
- return
734
-
735
- # Turn off the axis
736
- ax.axis('off')
737
-
738
- def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
739
- """
740
- Display the player's bio information on the given axis.
741
-
742
- Parameters
743
- ----------
744
- pitcher_id : str
745
- The player's ID.
746
- ax : plt.Axes
747
- The axis to display the bio information on.
748
- sport_id : int
749
- The sport ID (1 for MLB, other for minor leagues).
750
- year_input : int
751
- The season year.
752
- """
753
- # Construct the URL to fetch player data
754
- url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
755
-
756
- # Send a GET request to the URL and parse the JSON response
757
- data = requests.get(url).json()
758
-
759
- # Extract player information from the JSON data
760
- player_name = data['people'][0]['fullName']
761
- pitcher_hand = data['people'][0]['pitchHand']['code']
762
- age = data['people'][0]['currentAge']
763
- height = data['people'][0]['height']
764
- weight = data['people'][0]['weight']
765
-
766
- # Display the player's name, handedness, age, height, and weight on the axis
767
- ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=56)
768
- ax.text(0.5, 0.7, f'{pitcher_hand}HP, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=30)
769
- ax.text(0.5, 0.45, f'Season Pitching Summary', va='top', ha='center', fontsize=40)
770
-
771
- # Make API call to retrieve sports information
772
- response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
773
-
774
- # Convert the JSON response into a Polars DataFrame
775
- df_sport_id = pl.DataFrame(response['sports'])
776
- abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
777
-
778
- # Display the season and sport abbreviation
779
- ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
780
-
781
- # Turn off the axis
782
- ax.axis('off')
783
-
784
- def plot_logo(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players: pl.DataFrame):
785
- """
786
- Display the team logo for the given pitcher on the specified axis.
787
-
788
- Parameters
789
- ----------
790
- pitcher_id : str
791
- The ID of the pitcher.
792
- ax : plt.Axes
793
- The axis to display the logo on.
794
- df_team : pl.DataFrame
795
- The DataFrame containing team data.
796
- df_players : pl.DataFrame
797
- The DataFrame containing player data.
798
- """
799
- # List of MLB teams and their corresponding ESPN logo URLs
800
- mlb_teams = [
801
- {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
802
- {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
803
- {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
804
- {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
805
- {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
806
- {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
807
- {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
808
- {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
809
- {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
810
- {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
811
- {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
812
- {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
813
- {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
814
- {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
815
- {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
816
- {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
817
- {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
818
- {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
819
- {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
820
- {"team": "OAK", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
821
- {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
822
- {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
823
- {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
824
- {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
825
- {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
826
- {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
827
- {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
828
- {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
829
- {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
830
- {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
831
- {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
832
- ]
833
-
834
- try:
835
- # Create a DataFrame from the list of dictionaries
836
- df_image = pd.DataFrame(mlb_teams)
837
- image_dict = df_image.set_index('team')['logo_url'].to_dict()
838
-
839
- # Get the team ID for the given pitcher
840
- team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
841
-
842
- # Construct the URL to fetch team data
843
- url_team = f'https://statsapi.mlb.com/api/v1/teams/{team_id}'
844
-
845
- # Send a GET request to the team URL and parse the JSON response
846
- data_team = requests.get(url_team).json()
847
-
848
- # Extract the team abbreviation
849
- if data_team['teams'][0]['id'] in df_team['parent_org_id']:
850
- team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
851
- else:
852
- team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
853
-
854
- # Get the logo URL from the image dictionary using the team abbreviation
855
- logo_url = image_dict[team_abb]
856
-
857
- # Send a GET request to the logo URL
858
- response = requests.get(logo_url)
859
-
860
- # Open the image from the response content
861
- img = Image.open(BytesIO(response.content))
862
-
863
- # Display the image on the axis
864
- ax.set_xlim(0, 1.3)
865
- ax.set_ylim(0, 1)
866
- ax.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
867
-
868
- # Turn off the axis
869
- ax.axis('off')
870
- except (KeyError,IndexError) as e:
871
- ax.axis('off')
872
- return
873
-
874
- splits = {
875
- 'all':0,
876
- 'left':13,
877
- 'right':14,
878
- }
879
-
880
- splits_title = {
881
-
882
- 'all':'',
883
- 'left':' vs LHH',
884
- 'right':' vs RHH',
885
-
886
- }
887
-
888
-
889
- def fangraphs_pitching_leaderboards(season: int,
890
- split: str,
891
- start_date: str = '2024-01-01',
892
- end_date: str = '2024-12-31'):
893
- """
894
- Fetch pitching leaderboards data from Fangraphs.
895
-
896
- Parameters
897
- ----------
898
- season : int
899
- The season year.
900
- split : str
901
- The split type (e.g., 'All', 'LHH', 'RHH').
902
- start_date : str, optional
903
- The start date for the data (default is '2024-01-01').
904
- end_date : str, optional
905
- The end date for the data (default is '2024-12-31').
906
-
907
- Returns
908
- -------
909
- pl.DataFrame
910
- The DataFrame containing the pitching leaderboards data.
911
- """
912
- url = f"""
913
- https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season}
914
- &startdate={start_date}&enddate={end_date}&ind=0&qual=0&type=8&month=1000&pageitems=500000
915
- """
916
-
917
- data = requests.get(url).json()
918
- df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
919
- return df
920
-
921
- def fangraphs_splits_scrape(player_input: str, year_input: int, start_date: str, end_date: str, split: str) -> pl.DataFrame:
922
- """
923
- Scrape Fangraphs splits data for a specific player.
924
-
925
- Parameters
926
- ----------
927
- player_input : str
928
- The player's ID.
929
- year_input : int
930
- The season year.
931
- start_date : str
932
- The start date for the data.
933
- end_date : str
934
- The end date for the data.
935
- split : str
936
- The split type (e.g., 'all', 'left', 'right').
937
-
938
- Returns
939
- -------
940
- pl.DataFrame
941
- The DataFrame containing the splits data.
942
- """
943
- split_dict = {
944
- 'all': [],
945
- 'left': ['5'],
946
- 'right': ['6']
947
- }
948
-
949
-
950
-
951
- url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
952
-
953
- # Get Fangraphs player ID
954
- fg_id = str(fangraphs_pitching_leaderboards(
955
- year_input,
956
- split='All',
957
- start_date=f'{year_input}-01-01',
958
- end_date=f'{year_input}-12-31'
959
- ).filter(pl.col('xMLBAMID') == player_input)['playerid'][0])
960
-
961
- # Payload for basic stats
962
- payload = {
963
- "strPlayerId": fg_id,
964
- "strSplitArr": split_dict[split],
965
- "strGroup": "season",
966
- "strPosition": "P",
967
- "strType": "2",
968
- "strStartDate": pd.to_datetime(start_date).strftime('%Y-%m-%d'),
969
- "strEndDate": pd.to_datetime(end_date).strftime('%Y-%m-%d'),
970
- "strSplitTeams": False,
971
- "dctFilters": [],
972
- "strStatType": "player",
973
- "strAutoPt": False,
974
- "arrPlayerId": [],
975
- "strSplitArrPitch": [],
976
- "arrWxTemperature": None,
977
- "arrWxPressure": None,
978
- "arrWxAirDensity": None,
979
- "arrWxElevation": None,
980
- "arrWxWindSpeed": None
981
- }
982
-
983
- # Fetch basic stats
984
- response = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json'})
985
- data_pull = response.json()['data'][0]
986
-
987
- # Payload for advanced stats
988
- payload_advanced = payload.copy()
989
- payload_advanced["strType"] = "1"
990
-
991
- # Fetch advanced stats
992
- response_advanced = requests.post(url, data=json.dumps(payload_advanced), headers={'Content-Type': 'application/json'})
993
- data_pull_advanced = response_advanced.json()['data'][0]
994
-
995
- # Combine basic and advanced stats
996
- data_pull.update(data_pull_advanced)
997
- df_pull = pl.DataFrame(data_pull)
998
-
999
- return df_pull
1000
-
1001
-
1002
- def fangraphs_table(df: pl.DataFrame,
1003
- ax: plt.Axes,
1004
- player_input: str,
1005
- season: int,
1006
- split: str):
1007
- """
1008
- Create a table of Fangraphs pitching leaderboards data for a specific player.
1009
-
1010
- Parameters
1011
- ----------
1012
- ax : plt.Axes
1013
- The axis to plot the table on.
1014
- season : int
1015
- The season year.
1016
- split : str
1017
- The split type (e.g., 'All', 'LHH', 'RHH').
1018
- """
1019
-
1020
- start_date = df['game_date'][0]
1021
- end_date = df['game_date'][-1]
1022
-
1023
- # Fetch Fangraphs splits data
1024
- df_fangraphs = fangraphs_splits_scrape(player_input=player_input,
1025
- year_input=season,
1026
- start_date=start_date,
1027
- end_date=end_date,
1028
- split=split)
1029
-
1030
- # Select relevant columns for the table
1031
- plot_table = df_fangraphs.select(['IP', 'WHIP', 'ERA', 'TBF', 'FIP', 'K%', 'BB%', 'K-BB%'])
1032
-
1033
- # Format table values
1034
- plot_table_values = [format(plot_table[x][0], fangraphs_stats_dict[x]['format']) if plot_table[x][0] != '---' else '---' for x in plot_table.columns]
1035
-
1036
- # Create the table
1037
- table_fg = ax.table(cellText=[plot_table_values], colLabels=plot_table.columns, cellLoc='center',
1038
- bbox=[0.0, 0.1, 1, 0.7])
1039
-
1040
- # Set font size for the table
1041
- min_font_size = 20
1042
- table_fg.set_fontsize(min_font_size)
1043
-
1044
- # Update column names with formatted headers
1045
- new_column_names = [fangraphs_stats_dict[col]['table_header'] for col in plot_table.columns]
1046
- for i, col_name in enumerate(new_column_names):
1047
- table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
1048
-
1049
- # Set header text properties
1050
- ax.text(0.5, 0.9, f'{start_date} to {end_date}{splits_title[split]}', va='bottom', ha='center',
1051
- fontsize=36, fontstyle='italic')
1052
- ax.axis('off')
1053
-
1054
-
1055
- def stat_summary_table(df: pl.DataFrame,
1056
- player_input: int,
1057
- sport_id: int,
1058
- ax: plt.Axes,
1059
- split: str = 'All',
1060
- game_type: list = ['R']):
1061
- """
1062
- Create a summary table of player statistics.
1063
-
1064
- Parameters
1065
- ----------
1066
- df : pl.DataFrame
1067
- The DataFrame containing pitch data.
1068
- player_input : int
1069
- The player's ID.
1070
- sport_id : int
1071
- The sport ID (1 for MLB, other for minor leagues).
1072
- ax : plt.Axes
1073
- The axis to plot the table on.
1074
- split : str, optional
1075
- The split type (default is 'All').
1076
- """
1077
-
1078
- type_dict = {'R':'Regular Season',
1079
- 'S':'Spring',
1080
- 'P':'Playoffs' }
1081
-
1082
- split_title = {
1083
- 'all':'',
1084
- 'right':' vs RHH',
1085
- 'left':' vs LHH'
1086
- }
1087
-
1088
-
1089
- # Format start and end dates
1090
- start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
1091
- end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
1092
-
1093
- # Determine app context based on sport ID
1094
- appContext = 'majorLeague' if sport_id == 1 else 'minorLeague'
1095
-
1096
- game_type_str = ','.join([str(x) for x in game_type])
1097
-
1098
- # Fetch player stats from MLB API
1099
- pitcher_stats_call = requests.get(
1100
- f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format},gameType=[{game_type_str}])'
1101
- ).json()
1102
- print('HERE')
1103
- print(f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format},gameType=[{game_type_str}])')
1104
- # Extract stats and create DataFrame
1105
- pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1106
- pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1107
- pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header, pitcher_stats_call_values)))
1108
-
1109
- # Add additional calculated columns
1110
- pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
1111
- pl.lit(df['is_whiff'].sum()).alias('whiffs'),
1112
- (pl.col('strikeOuts') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
1113
- (pl.col('baseOnBalls') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
1114
- ((pl.col('strikeOuts') - pl.col('baseOnBalls')) / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
1115
- (((pl.col('homeRuns') * 13 + 3 * ((pl.col('baseOnBalls')) + (pl.col('hitByPitch'))) - 2 * (pl.col('strikeOuts')))) / ((pl.col('outs')) / 3) + 3.15).round(2).map_elements(lambda x: f"{x:.2f}").alias('fip'),
1116
- ((pl.col('strikes') / pl.col('numberOfPitches') * 100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
1117
- )
1118
-
1119
- # Determine columns and title based on game count and sport ID
1120
- if df['game_id'][0] == df['game_id'][-1]:
1121
- pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'earnedRuns', 'hits', 'strikeOuts', 'baseOnBalls', 'hitByPitch', 'homeRuns', 'strikePercentage', 'whiffs'])
1122
- new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$']
1123
- title = f'{df["game_date"][0]} vs {df["batter_team"][0]} ({type_dict[game_type[0]]}){split_title[split]}'
1124
- elif sport_id != 1 or game_type[0] in ['S','P']:
1125
- pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'whip', 'era', 'fip', 'k_percent', 'bb_percent', 'k_bb_percent', 'strikePercentage'])
1126
- new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{FIP}$', '$\\bf{K\%}$', '$\\bf{BB\%}$', '$\\bf{K-BB\%}$', '$\\bf{Strike\%}$']
1127
- title = f'{df["game_date"][0]} to {df["game_date"][-1]} ({type_dict[game_type[0]]}{split_title[split]})'
1128
- else:
1129
- fangraphs_table(df=df, ax=ax, player_input=player_input, season=int(df['game_date'][0][0:4]), split=split)
1130
- return
1131
-
1132
- # Create and format the table
1133
- table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center', bbox=[0.0, 0.1, 1, 0.7])
1134
- table_fg.set_fontsize(20)
1135
- for i, col_name in enumerate(new_column_names):
1136
- table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
1137
-
1138
- # Add title to the plot
1139
- ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic')
1140
- ax.axis('off')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/statcast_2024_grouped.csv DELETED
@@ -1,19 +0,0 @@
1
- pitch_type,pitch,release_speed,pfx_z,pfx_x,release_spin_rate,release_pos_x,release_pos_z,release_extension,delta_run_exp,swing,whiff,in_zone,out_zone,chase,xwoba,pitch_usage,whiff_rate,in_zone_rate,chase_rate,delta_run_exp_per_100,all
2
- CH,74155,85.46226725895522,5.247514143364433,-3.9745011679246045,1803.342540762527,-0.5077629855663421,5.740925968432281,6.449406057002311,204.631,37385,11538,28912,45151,15250,0.28973564881286695,0.10218846333521206,0.30862645446034503,0.38988604949093114,0.3377555314389493,-0.27595037421616886,
3
- CS,22,66.38181818181819,-7.232727272727273,5.176363636363637,2039.2727272727273,-1.7981818181818183,6.5177272727272735,6.0636363636363635,-0.6290000000000001,9,2,10,12,2,0.13466666666666668,3.0316852449257168e-05,0.2222222222222222,0.45454545454545453,0.16666666666666666,2.85909090909091,
4
- CU,47579,79.40938533133989,-9.345106445703216,4.516206279348902,2568.8591051473077,-0.6765712059634863,5.9438438375202685,6.401792908519479,93.57199999999999,19910,6150,20751,26738,7749,0.28049767649520974,0.0655657055765094,0.3088900050226017,0.4361377918829736,0.28981225222529733,-0.1966665966077471,
5
- EP,576,50.51909722222222,16.357291666666665,-3.8287500000000003,1256.7152777777778,-0.9668749999999999,6.647100694444444,4.442013888888889,23.643,252,7,207,369,106,0.3971430703517588,0.0007937503186714604,0.027777777777777776,0.359375,0.2872628726287263,-4.104687500000001,
6
- FA,635,67.81354330708662,15.865511811023623,-3.7226456692913388,1674.0144694533763,-1.1163779527559055,6.317716535433071,4.92488188976378,15.495,284,29,296,339,73,0.43393490999999995,0.0008750546047853774,0.10211267605633803,0.46614173228346456,0.2153392330383481,-2.4401574803149604,
7
- FC,58379,89.56435813713696,8.08895396195288,1.5509243697478992,2389.231715947733,-0.9745362684951281,5.8461769002079365,6.403954996645393,-20.390000000000015,28753,6674,30002,28189,7757,0.34077822947428493,0.08044852405159929,0.23211490974854798,0.5139176758765994,0.2751782610238036,0.034926942907552404,
8
- FF,230412,94.27369496062718,15.720274827472318,-3.1074418968484365,2296.591789895323,-0.7685432927147252,5.821400777026439,6.524392110813926,-80.28400000000002,113157,24741,127386,102722,24808,0.3401256910065045,0.3175166639335565,0.21864312415493517,0.5528618301130149,0.2415062012032476,0.03484367133656234,
9
- FO,168,82.07916666666667,1.7357142857142858,0.1378571428571428,946.8154761904761,-0.5333333333333333,5.8914285714285715,6.666666666666667,2.539,89,29,60,108,43,0.27798747368421056,0.0002315105096125093,0.3258426966292135,0.35714285714285715,0.39814814814814814,-1.511309523809524,
10
- FS,21727,86.31228885718231,2.979608781700189,-8.76550651263405,1302.3992981808108,-1.4640824780227366,5.742066553136651,6.508958525345622,-16.641000000000005,11333,3906,7982,13745,4946,0.2548785060302361,0.02994064787113684,0.34465719579987647,0.3673769963639711,0.3598399417970171,0.07659133796658538,
11
- KC,11916,81.79965592480698,-9.370896273917422,4.895297079556898,2444.1642796967144,-0.8788083249412554,5.940037764350453,6.434007553503986,-12.997000000000003,5312,1860,4858,7058,2316,0.25845137325418993,0.016420709717515837,0.3501506024096386,0.40768714333669015,0.32813828279965995,0.10907183618663985,
12
- KN,971,76.94819773429454,-2.9453759011328526,-5.356498455200824,263.56326987681973,-1.2303398558187437,5.542131822863028,6.45653964984552,12.681,426,113,428,543,130,0.2870389181034483,0.0013380756240103959,0.2652582159624413,0.4407826982492276,0.23941068139963168,-1.3059732234809474,
13
- PO,55,91.24909090909091,13.11709090909091,-6.399272727272727,2195.3818181818183,-1.494181818181818,5.861272727272727,6.305454545454546,0.0,0,0,1,54,0,,7.579213112314292e-05,,0.01818181818181818,0.0,-0.0,
14
- SC,159,81.02264150943397,-3.1056603773584905,-8.001509433962264,2050.5974842767296,-1.0535849056603774,6.110377358490566,6.064150943396227,4.623,58,13,63,96,20,0.35349463636363637,0.0002191081608832677,0.22413793103448276,0.39622641509433965,0.20833333333333334,-2.9075471698113207,
15
- SI,116002,93.34805382235511,7.567078832293412,-6.148476070311284,2147.3631502060834,-0.7671983511070397,5.622119363257688,6.435364206296976,-32.837000000000025,53318,7390,65492,50222,12474,0.3501967420378125,0.15985525080994228,0.13860234817510034,0.5645764728194341,0.2483772052088726,0.028307270564300636,
16
- SL,116390,85.60138786052518,1.5759858803271631,2.7325110632802407,2435.5705519351436,-0.9811034007748601,5.761407576409815,6.433055359327349,-167.41500000000002,56606,19101,52478,63672,20396,0.2818607008786495,0.16038992984404735,0.337437727449387,0.45088065985050263,0.3203291870838045,0.14383967694819144,
17
- ST,43821,81.8580155633144,1.4796932977339632,7.821825152324228,2575.3661920073496,-1.080187124894457,5.4607240820611125,6.40352674793587,-52.96800000000001,20035,6276,19349,24472,7531,0.25978070794500324,0.0603870359626772,0.3132518093336661,0.44154629059126904,0.30773945733899966,0.12087355377558708,
18
- SV,2702,81.67483345669874,-4.788941524796447,7.356861584011844,2470.624858757062,-0.5779570688378979,5.420762398223538,6.227296392711045,0.19299999999999926,1117,339,1138,1564,479,0.2907683709923664,0.0037234606962678577,0.3034914950760967,0.42116950407105846,0.3062659846547315,-0.007142857142857115,
19
- All,725669,89.1521052747817,7.058379139422499,-1.2140087540219224,2255.6768252515376,-0.8282529777063689,5.758824349487279,6.456550518555369,-20.178000000000118,352163,89742,359413,365054,104080,0.3147037524825,1.0,0.25483085957354973,0.4952850404247667,0.28510850449522535,0.002780606585095976,all