MLstructureMining / init_repo_MLstructureMining.py

pushing files to the repo from the example!

8f51864 over 1 year ago

2.58 kB

	import json
	import os
	import pickle
	from pathlib import Path
	from tempfile import mkdtemp, mkstemp
	from uuid import uuid4

	import numpy as np
	import xgboost
	from xgboost import XGBClassifier

	import sklearn
	from huggingface_hub import HfApi
	from sklearn.datasets import load_breast_cancer
	from sklearn.ensemble import HistGradientBoostingClassifier
	from sklearn.experimental import enable_halving_search_cv # noqa
	from sklearn.model_selection import HalvingGridSearchCV, train_test_split
	import shutil
	from skops import card, hub_utils
	from data_loader import get_data_splits_from_clean_data
	# Paths
	model_path = "xgb_model_bayse_optimization_00000.bin"
	label_path = "labels.csv"



	# Data
	X, y = load_breast_cancer(as_frame=True, return_X_y=True)
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.3, random_state=42
	)
	print("X's summary: ", X.describe())
	print("y's summary: ", y.describe())

	# # Train model
	param_grid = {
	"max_leaf_nodes": [5, 10, 15],
	"max_depth": [2, 5, 10],
	}


	train_tuple = get_data_splits_from_clean_data(
	"./cifs_test_s_trained_model", label_path, simple_load=True, n_data=-1
	)
	print(train_tuple)
	X_test = train_tuple[0]

	booster = xgboost.Booster({'nthread': 8})
	booster.load_model(model_path)

	model = XGBClassifier()

	# Set the booster
	model._Booster = booster

	local_repo = mkdtemp(prefix="skops-")
	hub_utils.init(
	model=model_path,
	requirements=[f"xgboost={xgboost.__version__}"],
	dst=local_repo,
	task="tabular-classification",
	data=X_test,
	)

	shutil.copy(label_path, os.path.join(local_repo, label_path))
	if "__file__" in locals(): # __file__ not defined during docs built
	# Add this script itself to the files to be uploaded for reproducibility
	hub_utils.add_files(__file__, dst=local_repo)

	print(os.listdir(local_repo))
	print(type(model))
	print(card.metadata_from_config(Path(local_repo)))
	print(type(card.metadata_from_config(Path(local_repo))))
	model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo)))
	model_card.save(Path(local_repo) / "README.md")


	# you can put your own token here, or set it as an environment variable before
	# running this script.
	token = os.environ["HF_HUB_TOKEN"]

	repo_name = f"MLstructureMining"
	user_name = HfApi().whoami(token=token)["name"]
	repo_id = f"{user_name}/{repo_name}"
	print(f"Creating and pushing to repo: {repo_id}")


	hub_utils.push(
	repo_id=repo_id,
	source=local_repo,
	token=token,
	commit_message="pushing files to the repo from the example!",
	create_remote=True,
	private=True,
	)