Ekjaer commited on
Commit
236dae9
·
verified ·
1 Parent(s): d05c85b

Delete init_repo_MLstructureMining.py

Browse files
Files changed (1) hide show
  1. init_repo_MLstructureMining.py +0 -86
init_repo_MLstructureMining.py DELETED
@@ -1,86 +0,0 @@
1
- import json
2
- import os
3
- import pickle
4
- from pathlib import Path
5
- from tempfile import mkdtemp, mkstemp
6
- from uuid import uuid4
7
-
8
- import numpy as np
9
- import xgboost
10
- from xgboost import XGBClassifier
11
-
12
- import sklearn
13
- from huggingface_hub import HfApi
14
- from sklearn.datasets import load_breast_cancer
15
- from sklearn.ensemble import HistGradientBoostingClassifier
16
- from sklearn.experimental import enable_halving_search_cv # noqa
17
- from sklearn.model_selection import HalvingGridSearchCV, train_test_split
18
- import shutil
19
- from skops import card, hub_utils
20
- from data_loader import get_data_splits_from_clean_data
21
- # Paths
22
- model_path = "MLstructureMining_model.bin"
23
- label_path = "labels.csv"
24
- data_path = "./cifs_test_s_trained_model"
25
-
26
- train_tuple = get_data_splits_from_clean_data(
27
- data_path, label_path, simple_load=True, n_data=-1
28
- )
29
- print(train_tuple)
30
- X_test = train_tuple[0]
31
-
32
- booster = xgboost.Booster({'nthread': 8})
33
- booster.load_model(model_path)
34
-
35
- model = XGBClassifier()
36
-
37
- # Set the booster
38
- model._Booster = booster
39
-
40
- local_repo = mkdtemp(prefix="skops-")
41
- hub_utils.init(
42
- model=model_path,
43
- requirements=[f"xgboost={xgboost.__version__}"],
44
- dst=local_repo,
45
- task="tabular-classification",
46
- data=X_test,
47
- )
48
-
49
- shutil.copy(label_path, os.path.join(local_repo, label_path))
50
- if "__file__" in locals(): # __file__ not defined during docs built
51
- # Add this script itself to the files to be uploaded for reproducibility
52
- hub_utils.add_files(__file__, dst=local_repo)
53
-
54
- print(os.listdir(local_repo))
55
- print(type(model))
56
-
57
- card.metadata_from_config(Path(local_repo))["model_type"] = "xgboost"
58
- model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo)))
59
- model_card.add(**{"model_type": "xgboost"})
60
- model_card.save(Path(local_repo) / "README.md")
61
- model_card.save("README.md")
62
-
63
- with open(os.path.join(local_repo, "config.json"), "r") as file:
64
- data = json.load(file)
65
- data["model_type"] = "xgboost"
66
- with open(os.path.join(local_repo, "config.json"), "w") as file:
67
- json.dump(data, file, indent=4)
68
-
69
- # you can put your own token here, or set it as an environment variable before
70
- # running this script.
71
- token = os.environ["HF_HUB_TOKEN"]
72
-
73
- repo_name = f"MLstructureMining"
74
- user_name = HfApi().whoami(token=token)["name"]
75
- repo_id = f"{user_name}/{repo_name}"
76
- print(f"Creating and pushing to repo: {repo_id}")
77
-
78
-
79
- hub_utils.push(
80
- repo_id=repo_id,
81
- source=local_repo,
82
- token=token,
83
- commit_message="pushing files to the repo from the example!",
84
- create_remote=True,
85
- private=True,
86
- )