James McCool commited on
Commit
872a007
·
1 Parent(s): d01aca2

introduced a button to recalculate diversity

Browse files
Files changed (2) hide show
  1. app.py +5 -1
  2. global_func/recalc_diversity.py +59 -0
app.py CHANGED
@@ -27,6 +27,7 @@ from global_func.analyze_player_combos import analyze_player_combos
27
  from global_func.stratification_function import stratification_function
28
  from global_func.exposure_spread import exposure_spread
29
  from global_func.reassess_edge import reassess_edge
 
30
 
31
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
32
  stacking_sports = ['MLB', 'NHL', 'NFL', 'LOL', 'NCAAF']
@@ -1829,7 +1830,7 @@ if selected_tab == 'Manage Portfolio':
1829
  st.session_state['export_file'][col] = st.session_state['export_file'][col].map(position_dict)
1830
 
1831
  if 'export_file' in st.session_state:
1832
- download_port, merge_port, partial_col, clear_export, blank_export_col = st.columns([1, 1, 1, 1, 8])
1833
  with download_port:
1834
  st.download_button(label="Download Portfolio", data=st.session_state['export_file'].to_csv(index=False), file_name="portfolio.csv", mime="text/csv")
1835
  with merge_port:
@@ -1851,6 +1852,9 @@ if selected_tab == 'Manage Portfolio':
1851
  st.session_state['display_frame'] = st.session_state['working_frame']
1852
  elif display_frame_source == 'Export Base':
1853
  st.session_state['display_frame'] = st.session_state['export_base']
 
 
 
1854
 
1855
  total_rows = len(st.session_state['display_frame'])
1856
  rows_per_page = 100
 
27
  from global_func.stratification_function import stratification_function
28
  from global_func.exposure_spread import exposure_spread
29
  from global_func.reassess_edge import reassess_edge
30
+ from global_func.recalc_diversity import recalc_diversity
31
 
32
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
33
  stacking_sports = ['MLB', 'NHL', 'NFL', 'LOL', 'NCAAF']
 
1830
  st.session_state['export_file'][col] = st.session_state['export_file'][col].map(position_dict)
1831
 
1832
  if 'export_file' in st.session_state:
1833
+ download_port, merge_port, partial_col, clear_export, recalc_div_col, blank_export_col = st.columns([1, 1, 1, 1, 1, 8])
1834
  with download_port:
1835
  st.download_button(label="Download Portfolio", data=st.session_state['export_file'].to_csv(index=False), file_name="portfolio.csv", mime="text/csv")
1836
  with merge_port:
 
1852
  st.session_state['display_frame'] = st.session_state['working_frame']
1853
  elif display_frame_source == 'Export Base':
1854
  st.session_state['display_frame'] = st.session_state['export_base']
1855
+ with recalc_div_col:
1856
+ if st.button("Recalculate Diversity"):
1857
+ st.session_state['display_frame']['Diversity'] = recalc_diversity(st.session_state['display_frame'], player_columns)
1858
 
1859
  total_rows = len(st.session_state['display_frame'])
1860
  rows_per_page = 100
global_func/recalc_diversity.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ import math
6
+ from difflib import SequenceMatcher
7
+
8
+ def recalc_diversity(portfolio, player_columns):
9
+ """
10
+ Vectorized version of recalc_diversity using NumPy operations.
11
+ """
12
+ # Extract player data and convert to string array
13
+ player_data = portfolio[player_columns].astype(str).fillna('').values
14
+
15
+ # Get all unique players and create a mapping to numeric IDs
16
+ all_players = set()
17
+ for row in player_data:
18
+ for val in row:
19
+ if isinstance(val, str) and val.strip() != '':
20
+ all_players.add(val)
21
+
22
+ # Create player ID mapping
23
+ player_to_id = {player: idx for idx, player in enumerate(sorted(all_players))}
24
+
25
+ # Convert each row to a binary vector (1 if player is present, 0 if not)
26
+ n_players = len(all_players)
27
+ n_rows = len(portfolio)
28
+ binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8)
29
+
30
+ # Vectorized binary matrix creation
31
+ for i, row in enumerate(player_data):
32
+ for val in row:
33
+ if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id:
34
+ binary_matrix[i, player_to_id[str(val)]] = 1
35
+
36
+ # Vectorized Jaccard distance calculation
37
+ intersection_matrix = np.dot(binary_matrix, binary_matrix.T)
38
+ row_sums = np.sum(binary_matrix, axis=1)
39
+ union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix
40
+
41
+ # Calculate Jaccard distance: 1 - (intersection / union)
42
+ with np.errstate(divide='ignore', invalid='ignore'):
43
+ jaccard_similarity = np.divide(intersection_matrix, union_matrix,
44
+ out=np.zeros_like(intersection_matrix, dtype=float),
45
+ where=union_matrix != 0)
46
+
47
+ jaccard_distance = 1 - jaccard_similarity
48
+
49
+ # Exclude self-comparison and calculate average distance for each row
50
+ np.fill_diagonal(jaccard_distance, 0)
51
+ row_counts = n_rows - 1
52
+ similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts
53
+
54
+ # Normalize to 0-1 scale
55
+ score_range = similarity_scores.max() - similarity_scores.min()
56
+ if score_range > 0:
57
+ similarity_scores = (similarity_scores - similarity_scores.min()) / score_range
58
+
59
+ return similarity_scores