File size: 2,404 Bytes
4774d56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import xgboost as xgb
import pandas as pd
import pickle as pkl
import numpy as np
from tqdm import tqdm
from IPython.display import clear_output
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import os

current_directory = os.path.dirname(os.path.abspath(__file__))
parent_directory = os.path.dirname(current_directory)
data_directory = os.path.join(parent_directory, 'Data')
model_directory = os.path.join(parent_directory, 'Models')
pickle_directory = os.path.join(parent_directory, 'Pickles')

file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
data = pd.read_csv(file_path).dropna()

margin = data['Home-Team-Win']
data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings', 'Home Odds', 'Away Odds'], inplace=True)

acc_results = []

for x in tqdm(range(100)):
    X_train, X_test, y_train, y_test = train_test_split(data, margin, test_size=.1)

    train_games = X_train['game_id']
    test_games = X_test['game_id']

    X_train.drop(columns=['game_id'], inplace=True)
    X_test.drop(columns=['game_id'], inplace=True)

    train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
    test = xgb.DMatrix(X_test.astype(float).values, label=y_test)

    param = {
        'max_depth': 2,
        'eta': 0.01,
        'objective': 'multi:softprob',
        'num_class': 2
    }
    epochs = 500

    model = xgb.train(param, train, epochs)
    predictions = model.predict(test)
    y = []
    for z in predictions:
        y.append(np.argmax(z))

    acc = round(accuracy_score(y_test, y)*100, 1)
    acc_results.append(acc)
    clear_output(wait=True)
    print(f"Best accuracy: {max(acc_results)}%")

    # only save results if they are the best so far
    if acc == max(acc_results):
        file_path = os.path.join(pickle_directory, 'train_games_ML_no_odds.pkl')
        with open(file_path,'wb') as f:
            pkl.dump(train_games,f)

        file_path = os.path.join(pickle_directory, 'test_games_ML_no_odds.pkl')
        with open(file_path,'wb') as f:
            pkl.dump(test_games,f)

        file_path = os.path.join(model_directory, f'xgboost_ML_no_odds_{acc}%.json')
        model.save_model(file_path)

print('Done')