File size: 2,578 Bytes
a544449
 
 
 
 
 
 
 
 
0bf8457
 
a544449
 
 
 
 
 
9e81ba8
a544449
8f51864
dd5a4e5
 
 
 
 
8f51864
a544449
 
 
 
 
 
 
 
 
 
 
 
 
 
92e909a
8f51864
 
 
 
 
a544449
0bf8457
dd5a4e5
0bf8457
 
9e81ba8
0bf8457
 
9e81ba8
a544449
 
dd5a4e5
8f51864
a544449
 
 
 
8f51864
dd5a4e5
a544449
 
 
 
0c2e2ed
0bf8457
8f51864
 
a544449
 
 
 
 
 
d760ca3
a544449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import json
import os
import pickle
from pathlib import Path
from tempfile import mkdtemp, mkstemp
from uuid import uuid4

import numpy as np
import xgboost
from xgboost import XGBClassifier

import sklearn
from huggingface_hub import HfApi
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV, train_test_split
import shutil
from skops import card, hub_utils
from data_loader import get_data_splits_from_clean_data
# Paths
model_path = "xgb_model_bayse_optimization_00000.bin"
label_path = "labels.csv"



# Data
X, y = load_breast_cancer(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
print("X's summary: ", X.describe())
print("y's summary: ", y.describe())

# # Train model
param_grid = {
    "max_leaf_nodes": [5, 10, 15],
    "max_depth": [2, 5, 10],
}


train_tuple = get_data_splits_from_clean_data(
    "./cifs_test_s_trained_model", label_path, simple_load=True, n_data=-1
)
print(train_tuple)
X_test = train_tuple[0]

booster = xgboost.Booster({'nthread': 8})
booster.load_model(model_path)

model = XGBClassifier()

# Set the booster
model._Booster = booster

local_repo = mkdtemp(prefix="skops-")
hub_utils.init(
    model=model_path,
    requirements=[f"xgboost={xgboost.__version__}"],
    dst=local_repo,
    task="tabular-classification",
    data=X_test,
)

shutil.copy(label_path, os.path.join(local_repo, label_path))
if "__file__" in locals():  # __file__ not defined during docs built
    # Add this script itself to the files to be uploaded for reproducibility
    hub_utils.add_files(__file__, dst=local_repo)

print(os.listdir(local_repo))
print(type(model))
print(card.metadata_from_config(Path(local_repo)))
print(type(card.metadata_from_config(Path(local_repo))))
model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo)))
model_card.save(Path(local_repo) / "README.md")


# you can put your own token here, or set it as an environment variable before
# running this script.
token = os.environ["HF_HUB_TOKEN"]

repo_name = f"MLstructureMining"
user_name = HfApi().whoami(token=token)["name"]
repo_id = f"{user_name}/{repo_name}"
print(f"Creating and pushing to repo: {repo_id}")


hub_utils.push(
    repo_id=repo_id,
    source=local_repo,
    token=token,
    commit_message="pushing files to the repo from the example!",
    create_remote=True,
    private=True,
)