|
import json |
|
import os |
|
import pickle |
|
from pathlib import Path |
|
from tempfile import mkdtemp, mkstemp |
|
from uuid import uuid4 |
|
|
|
import numpy as np |
|
import xgboost |
|
import sklearn |
|
from huggingface_hub import HfApi |
|
from sklearn.datasets import load_breast_cancer |
|
from sklearn.ensemble import HistGradientBoostingClassifier |
|
from sklearn.experimental import enable_halving_search_cv |
|
from sklearn.model_selection import HalvingGridSearchCV, train_test_split |
|
import shutil |
|
from skops import card, hub_utils |
|
|
|
|
|
X, y = load_breast_cancer(as_frame=True, return_X_y=True) |
|
X_train, X_test, y_train, y_test = train_test_split( |
|
X, y, test_size=0.3, random_state=42 |
|
) |
|
print("X's summary: ", X.describe()) |
|
print("y's summary: ", y.describe()) |
|
|
|
|
|
param_grid = { |
|
"max_leaf_nodes": [5, 10, 15], |
|
"max_depth": [2, 5, 10], |
|
} |
|
|
|
model = HalvingGridSearchCV( |
|
estimator=HistGradientBoostingClassifier(), |
|
param_grid=param_grid, |
|
random_state=42, |
|
n_jobs=-1, |
|
).fit(X_train, y_train) |
|
model.score(X_test, y_test) |
|
|
|
|
|
_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") |
|
with open(pkl_name, mode="bw") as f: |
|
pickle.dump(model, file=f) |
|
|
|
|
|
|
|
local_repo = mkdtemp(prefix="skops-") |
|
hub_utils.init( |
|
|
|
model="xgb_model_bayse_optimization_00000.bin", |
|
requirements=[f"scikit-learn={sklearn.__version__}", f"xgboost={xgboost.__version__}"], |
|
dst=local_repo, |
|
task="tabular-classification", |
|
data=X_test, |
|
) |
|
if "__file__" in locals(): |
|
|
|
hub_utils.add_files(__file__, dst=local_repo) |
|
|
|
print(os.listdir(local_repo)) |
|
|
|
model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo))) |
|
model_card.save(Path(local_repo) / "README.md") |
|
|
|
|
|
|
|
|
|
token = os.environ["HF_HUB_TOKEN"] |
|
|
|
repo_name = f"MLstructureMining" |
|
user_name = HfApi().whoami(token=token)["name"] |
|
repo_id = f"{user_name}/{repo_name}" |
|
print(f"Creating and pushing to repo: {repo_id}") |
|
|
|
|
|
hub_utils.push( |
|
repo_id=repo_id, |
|
source=local_repo, |
|
token=token, |
|
commit_message="pushing files to the repo from the example!", |
|
create_remote=True, |
|
private=True, |
|
) |