|
import json |
|
import os |
|
import pickle |
|
from pathlib import Path |
|
from tempfile import mkdtemp, mkstemp |
|
from uuid import uuid4 |
|
|
|
import numpy as np |
|
import xgboost |
|
from xgboost import XGBClassifier |
|
|
|
import sklearn |
|
from huggingface_hub import HfApi |
|
from sklearn.datasets import load_breast_cancer |
|
from sklearn.ensemble import HistGradientBoostingClassifier |
|
from sklearn.experimental import enable_halving_search_cv |
|
from sklearn.model_selection import HalvingGridSearchCV, train_test_split |
|
import shutil |
|
from skops import card, hub_utils |
|
from data_loader import get_data_splits_from_clean_data |
|
|
|
model_path = "xgb_model_bayse_optimization_00000.bin" |
|
label_path = "labels.csv" |
|
|
|
|
|
|
|
|
|
X, y = load_breast_cancer(as_frame=True, return_X_y=True) |
|
X_train, X_test, y_train, y_test = train_test_split( |
|
X, y, test_size=0.3, random_state=42 |
|
) |
|
print("X's summary: ", X.describe()) |
|
print("y's summary: ", y.describe()) |
|
|
|
|
|
param_grid = { |
|
"max_leaf_nodes": [5, 10, 15], |
|
"max_depth": [2, 5, 10], |
|
} |
|
|
|
|
|
train_tuple = get_data_splits_from_clean_data( |
|
"./cifs_test_s_trained_model", label_path, simple_load=True, n_data=-1 |
|
) |
|
print(train_tuple) |
|
X_test = train_tuple[0] |
|
|
|
booster = xgboost.Booster({'nthread': 8}) |
|
booster.load_model(model_path) |
|
|
|
model = XGBClassifier() |
|
|
|
|
|
model._Booster = booster |
|
|
|
local_repo = mkdtemp(prefix="skops-") |
|
hub_utils.init( |
|
model=model_path, |
|
requirements=[f"xgboost={xgboost.__version__}"], |
|
dst=local_repo, |
|
task="tabular-classification", |
|
data=X_test, |
|
) |
|
|
|
shutil.copy(label_path, os.path.join(local_repo, label_path)) |
|
if "__file__" in locals(): |
|
|
|
hub_utils.add_files(__file__, dst=local_repo) |
|
|
|
print(os.listdir(local_repo)) |
|
print(type(model)) |
|
print(card.metadata_from_config(Path(local_repo))) |
|
print(type(card.metadata_from_config(Path(local_repo)))) |
|
model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo))) |
|
model_card.save(Path(local_repo) / "README.md") |
|
|
|
|
|
|
|
|
|
token = os.environ["HF_HUB_TOKEN"] |
|
|
|
repo_name = f"MLstructureMining" |
|
user_name = HfApi().whoami(token=token)["name"] |
|
repo_id = f"{user_name}/{repo_name}" |
|
print(f"Creating and pushing to repo: {repo_id}") |
|
|
|
|
|
hub_utils.push( |
|
repo_id=repo_id, |
|
source=local_repo, |
|
token=token, |
|
commit_message="pushing files to the repo from the example!", |
|
create_remote=True, |
|
private=True, |
|
) |