MLstructureMining / init_repo_MLstructureMining.py
Ekjaer's picture
pushing files to the repo from the example!
8f51864
raw
history blame
2.58 kB
import json
import os
import pickle
from pathlib import Path
from tempfile import mkdtemp, mkstemp
from uuid import uuid4
import numpy as np
import xgboost
from xgboost import XGBClassifier
import sklearn
from huggingface_hub import HfApi
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.experimental import enable_halving_search_cv # noqa
from sklearn.model_selection import HalvingGridSearchCV, train_test_split
import shutil
from skops import card, hub_utils
from data_loader import get_data_splits_from_clean_data
# Paths
model_path = "xgb_model_bayse_optimization_00000.bin"
label_path = "labels.csv"
# Data
X, y = load_breast_cancer(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)
print("X's summary: ", X.describe())
print("y's summary: ", y.describe())
# # Train model
param_grid = {
"max_leaf_nodes": [5, 10, 15],
"max_depth": [2, 5, 10],
}
train_tuple = get_data_splits_from_clean_data(
"./cifs_test_s_trained_model", label_path, simple_load=True, n_data=-1
)
print(train_tuple)
X_test = train_tuple[0]
booster = xgboost.Booster({'nthread': 8})
booster.load_model(model_path)
model = XGBClassifier()
# Set the booster
model._Booster = booster
local_repo = mkdtemp(prefix="skops-")
hub_utils.init(
model=model_path,
requirements=[f"xgboost={xgboost.__version__}"],
dst=local_repo,
task="tabular-classification",
data=X_test,
)
shutil.copy(label_path, os.path.join(local_repo, label_path))
if "__file__" in locals(): # __file__ not defined during docs built
# Add this script itself to the files to be uploaded for reproducibility
hub_utils.add_files(__file__, dst=local_repo)
print(os.listdir(local_repo))
print(type(model))
print(card.metadata_from_config(Path(local_repo)))
print(type(card.metadata_from_config(Path(local_repo))))
model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo)))
model_card.save(Path(local_repo) / "README.md")
# you can put your own token here, or set it as an environment variable before
# running this script.
token = os.environ["HF_HUB_TOKEN"]
repo_name = f"MLstructureMining"
user_name = HfApi().whoami(token=token)["name"]
repo_id = f"{user_name}/{repo_name}"
print(f"Creating and pushing to repo: {repo_id}")
hub_utils.push(
repo_id=repo_id,
source=local_repo,
token=token,
commit_message="pushing files to the repo from the example!",
create_remote=True,
private=True,
)