import nfl_data_py.nfl_data_py as nfl import build import datetime as dt import numpy as np import io import pandas as pd pd.set_option('chained_assignment',None) pd.set_option('display.max_columns',None) import os import pickle as pkl import requests from bs4 import BeautifulSoup current_directory = os.path.dirname(os.path.abspath(__file__)) parent_directory = os.path.dirname(current_directory) data_directory = os.path.join(parent_directory, 'Data') pickle_directory = os.path.join(parent_directory, 'Pickles') # get team abbreviations file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl') with open(file_path, 'rb') as f: team_name_to_abbreviation = pkl.load(f) file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl') with open(file_path, 'rb') as f: team_abbreviation_to_name = pkl.load(f) # get current season year = dt.datetime.now().year month = dt.datetime.now().month current_season = year if month in [8,9,10,11,12] else year-1 # get schedule print('Getting schedule.\n') url = 'https://www.nbcsports.com/nfl/schedule' df = pd.read_html(url) file_path = os.path.join(pickle_directory, 'schedule.pkl') with open(file_path, 'wb') as f: pkl.dump(df, f) def get_week(): headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Dnt': '1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' } url = 'https://www.nfl.com/schedules/' resp = requests.get(url,headers=headers) soup = BeautifulSoup(resp.text, 'html.parser') h2_tags = soup.find_all('h2') year = h2_tags[0].getText().split(' ')[0] week = h2_tags[0].getText().split(' ')[-1] if week=='CARD': week = 18 return int(week), int(year) def get_lines(season): url = 'https://www.sportsbettingdime.com/nfl/public-betting-trends/' response = requests.get(url) html = BeautifulSoup(response.text) week = html.find_all('h2')[0].get_text().split(' ')[-1] df = pd.read_html(io.StringIO(response.text)) columns = list(df[0].loc[0,:].values) columns = columns[:2] + columns[3:] data_list = [] for data in df[1:-1]: data.columns = columns data['Matchup'] = data['Matchup'].str.extract('([A-Z]+)[^A-Za-z]*$') data_dict = { 'season' : season, 'week' : week, 'home_team' : data['Matchup'][1], 'away_team' : data['Matchup'][0], 'away_spread' : float(data.iloc[0,4].replace('+','')), 'money_on_away_ats' : int(data.iloc[0,5].replace('%',''))/100, 'bets_on_away_ats' : int(data.iloc[0,6].replace('%',''))/100, 'away_moneyline' : int(data['moneyline'][0].replace('+','')), 'money_on_away_ml' : int(data.iloc[0,8].replace('%',''))/100, 'bets_on_away_ml' : int(data.iloc[0,9].replace('%',''))/100, 'over_under' : data['total'].str.replace('o','').str.replace('u','').astype(float).mean(), 'money_on_over' : int(data.iloc[0,11].replace('%',''))/100, 'bets_on_over' : int(data.iloc[0,12].replace('%',''))/100 } data_list.append(data_dict) betting_data = pd.DataFrame(data_list) betting_data['key'] = [f'{season}_{week}_{away}_{home}' for season, week, away, home in betting_data[['season','week','away_team','home_team']].values] return betting_data current_week = get_week()[0] the_week = {'week':current_week, 'year':current_season} file_path = os.path.join(pickle_directory, 'the_week.pkl') with open(file_path, 'wb') as f: pkl.dump(the_week, f) # update current season build.build_gbg_data(get_seasons=[current_season]) build.add_odds_data() # get winners pbp = build.get_pbp_data([current_season]) pbp = pbp.drop_duplicates(subset='game_id') pbp[['season','week','away','home']] = pbp['game_id'].str.split('_', expand=True) games = pbp[['game_id','away_score','home_score','season','week','away','home']] games[['away_score','home_score','season','week']] = games[['away_score','home_score','season','week']].astype(int) games['away_team'] = games['away'].map(team_abbreviation_to_name) games['home_team'] = games['home'].map(team_abbreviation_to_name) games['total'] = games['away_score'] + games['home_score'] games['winner'] = [a if a_s>h_s else h if h_s>a_s else 'Tie' for a,h,a_s,h_s in games[['away_team','home_team','away_score','home_score']].values] file_path = os.path.join(data_directory, 'results.csv') games[['game_id','total','winner']].to_csv(file_path, index=False)