import json import os import pickle from pathlib import Path from tempfile import mkdtemp, mkstemp from uuid import uuid4 import numpy as np import xgboost import sklearn from huggingface_hub import HfApi from sklearn.datasets import load_breast_cancer from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.model_selection import HalvingGridSearchCV, train_test_split import shutil from skops import card, hub_utils # Data X, y = load_breast_cancer(as_frame=True, return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42 ) print("X's summary: ", X.describe()) print("y's summary: ", y.describe()) # # Train model param_grid = { "max_leaf_nodes": [5, 10, 15], "max_depth": [2, 5, 10], } model = HalvingGridSearchCV( estimator=HistGradientBoostingClassifier(), param_grid=param_grid, random_state=42, n_jobs=-1, ).fit(X_train, y_train) model.score(X_test, y_test)# The file name is not significant, here we choose to save it with a `pkl` # extension. _, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") with open(pkl_name, mode="bw") as f: pickle.dump(model, file=f) local_repo = mkdtemp(prefix="skops-") hub_utils.init( #model=pkl_name, model="xgb_model_bayse_optimization_00000.bin", requirements=[f"scikit-learn={sklearn.__version__}", f"xgboost={xgboost.__version__}"], dst=local_repo, task="tabular-classification", data=X_test, ) if "__file__" in locals(): # __file__ not defined during docs built # Add this script itself to the files to be uploaded for reproducibility hub_utils.add_files(__file__, dst=local_repo) print(os.listdir(local_repo)) model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo))) model_card.save(Path(local_repo) / "README.md") # you can put your own token here, or set it as an environment variable before # running this script. token = os.environ["HF_HUB_TOKEN"] repo_name = f"MLstructureMining" user_name = HfApi().whoami(token=token)["name"] repo_id = f"{user_name}/{repo_name}" print(f"Creating and pushing to repo: {repo_id}") hub_utils.push( repo_id=repo_id, source=local_repo, token=token, commit_message="pushing files to the repo from the example!", create_remote=True, private=True, )