import json |
import os |
import pickle |
from pathlib import Path |
from tempfile import mkdtemp, mkstemp |
from uuid import uuid4 |
import numpy as np |
import xgboost |
import sklearn |
from huggingface_hub import HfApi |
from sklearn.datasets import load_breast_cancer |
from sklearn.ensemble import HistGradientBoostingClassifier |
from sklearn.experimental import enable_halving_search_cv |
from sklearn.model_selection import HalvingGridSearchCV, train_test_split |
import shutil |
from skops import card, hub_utils |
X, y = load_breast_cancer(as_frame=True, return_X_y=True) |
X_train, X_test, y_train, y_test = train_test_split( |
X, y, test_size=0.3, random_state=42 |
) |
print("X's summary: ", X.describe()) |
print("y's summary: ", y.describe()) |
param_grid = { |
"max_leaf_nodes": [5, 10, 15], |
"max_depth": [2, 5, 10], |
} |
model = HalvingGridSearchCV( |
estimator=HistGradientBoostingClassifier(), |
param_grid=param_grid, |
random_state=42, |
n_jobs=-1, |
).fit(X_train, y_train) |
model.score(X_test, y_test) |
_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") |
with open(pkl_name, mode="bw") as f: |
pickle.dump(model, file=f) |
local_repo = mkdtemp(prefix="skops-") |
hub_utils.init( |
model="xgb_model_bayse_optimization_00000.bin", |
requirements=[f"scikit-learn={sklearn.__version__}", f"xgboost={xgboost.__version__}"], |
dst=local_repo, |
task="tabular-classification", |
data=X_test, |
) |
if "__file__" in locals(): |
hub_utils.add_files(__file__, dst=local_repo) |
print(os.listdir(local_repo)) |
model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo))) |
model_card.save(Path(local_repo) / "README.md") |
token = os.environ["HF_HUB_TOKEN"] |
repo_name = f"MLstructureMining" |
user_name = HfApi().whoami(token=token)["name"] |
repo_id = f"{user_name}/{repo_name}" |
print(f"Creating and pushing to repo: {repo_id}") |
hub_utils.push( |
repo_id=repo_id, |
source=local_repo, |
token=token, |
commit_message="pushing files to the repo from the example!", |
create_remote=True, |
private=True, |
) |