File size: 2,578 Bytes
a544449 0bf8457 a544449 9e81ba8 a544449 8f51864 dd5a4e5 8f51864 a544449 92e909a 8f51864 a544449 0bf8457 dd5a4e5 0bf8457 9e81ba8 0bf8457 9e81ba8 a544449 dd5a4e5 8f51864 a544449 8f51864 dd5a4e5 a544449 0c2e2ed 0bf8457 8f51864 a544449 d760ca3 a544449 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import json
import os
import pickle
from pathlib import Path
from tempfile import mkdtemp, mkstemp
from uuid import uuid4
import numpy as np
import xgboost
from xgboost import XGBClassifier
import sklearn
from huggingface_hub import HfApi
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.experimental import enable_halving_search_cv # noqa
from sklearn.model_selection import HalvingGridSearchCV, train_test_split
import shutil
from skops import card, hub_utils
from data_loader import get_data_splits_from_clean_data
# Paths
model_path = "xgb_model_bayse_optimization_00000.bin"
label_path = "labels.csv"
# Data
X, y = load_breast_cancer(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)
print("X's summary: ", X.describe())
print("y's summary: ", y.describe())
# # Train model
param_grid = {
"max_leaf_nodes": [5, 10, 15],
"max_depth": [2, 5, 10],
}
train_tuple = get_data_splits_from_clean_data(
"./cifs_test_s_trained_model", label_path, simple_load=True, n_data=-1
)
print(train_tuple)
X_test = train_tuple[0]
booster = xgboost.Booster({'nthread': 8})
booster.load_model(model_path)
model = XGBClassifier()
# Set the booster
model._Booster = booster
local_repo = mkdtemp(prefix="skops-")
hub_utils.init(
model=model_path,
requirements=[f"xgboost={xgboost.__version__}"],
dst=local_repo,
task="tabular-classification",
data=X_test,
)
shutil.copy(label_path, os.path.join(local_repo, label_path))
if "__file__" in locals(): # __file__ not defined during docs built
# Add this script itself to the files to be uploaded for reproducibility
hub_utils.add_files(__file__, dst=local_repo)
print(os.listdir(local_repo))
print(type(model))
print(card.metadata_from_config(Path(local_repo)))
print(type(card.metadata_from_config(Path(local_repo))))
model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo)))
model_card.save(Path(local_repo) / "README.md")
# you can put your own token here, or set it as an environment variable before
# running this script.
token = os.environ["HF_HUB_TOKEN"]
repo_name = f"MLstructureMining"
user_name = HfApi().whoami(token=token)["name"]
repo_id = f"{user_name}/{repo_name}"
print(f"Creating and pushing to repo: {repo_id}")
hub_utils.push(
repo_id=repo_id,
source=local_repo,
token=token,
commit_message="pushing files to the repo from the example!",
create_remote=True,
private=True,
) |