File size: 1,212 Bytes
7718235 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# Load the LogisticRegression classifier
# Note, use CV for cross-validation as requested in the question
from sklearn.ensemble import RandomForestClassifier
# Load some other sklearn functions
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import metrics
# Import other libraries
import pandas as pd, numpy as np
import os
import yaml
import sys
# $1 is the train data file name
# $2 is the train label file name
# $3 is the test data file name
# $4 is the test data file name
X_train = pd.read_csv(f'{sys.argv[1]}', index_col=0)
y_train = pd.read_csv(f'{sys.argv[2]}', index_col=0)
y_train = y_train['score']
y_train = y_train.astype(int)
X_test = pd.read_csv(f'{sys.argv[3]}', index_col=0)
y_test = pd.read_csv(f'{sys.argv[4]}', index_col=0)
y_test = y_test['score']
y_test = y_test.astype(int)
gp_classifier = RandomForestClassifier(random_state=0).fit(X_train, y_train)
# draw fpr tpr
fpr, tpr, thresholds = metrics.roc_curve(y_test.astype(float),
gp_classifier.predict_proba(X_test)[:,1],
pos_label=1)
print("AUC={:.9f}".format(metrics.auc(fpr, tpr)))
|