rapadilla commited on
Commit
34e8fb9
·
1 Parent(s): 65cfe9e

Replacing deprecated Repository with git

Browse files
Files changed (1) hide show
  1. init.py +40 -29
init.py CHANGED
@@ -1,48 +1,51 @@
1
- import os
2
  from constants import EVAL_REQUESTS_PATH
3
  from pathlib import Path
4
- from huggingface_hub import HfApi, Repository
5
  from dotenv import load_dotenv
 
 
6
 
7
  load_dotenv()
 
 
8
  TOKEN_HUB = os.environ.get("TOKEN_HUB_V2", None)
 
9
  QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
 
10
  QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
11
 
12
  hf_api = HfApi(
13
- endpoint="https://huggingface.co",
14
- token=TOKEN_HUB,
15
  )
16
 
 
17
  def load_all_info_from_dataset_hub():
18
  eval_queue_repo = None
19
- results_csv_path = None
20
  requested_models = None
21
 
22
- passed = True
23
  if TOKEN_HUB is None:
24
- passed = False
 
 
 
25
  else:
26
  print("Pulling evaluation requests and results.")
27
 
28
- eval_queue_repo = Repository(
29
- local_dir=QUEUE_PATH,
30
- clone_from=QUEUE_REPO,
31
- use_auth_token=TOKEN_HUB,
32
- repo_type="dataset",
33
  )
34
- eval_queue_repo.git_pull()
35
-
36
  # Local directory where dataset repo is cloned + folder with eval requests
37
  directory = QUEUE_PATH / EVAL_REQUESTS_PATH
38
  requested_models = get_all_requested_models(directory)
39
  requested_models = [p.stem for p in requested_models]
40
  # Local directory where dataset repo is cloned
41
  csv_results = get_csv_with_results(QUEUE_PATH)
42
- if csv_results is None:
43
- passed = False
44
- if not passed:
45
- print("No HuggingFace token provided. Skipping evaluation requests and results.")
46
 
47
  return eval_queue_repo, requested_models, csv_results
48
 
@@ -51,18 +54,21 @@ def upload_file(requested_model_name, path_or_fileobj):
51
  dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
52
  dest_repo_file = str(dest_repo_file)
53
  hf_api.upload_file(
54
- path_or_fileobj=path_or_fileobj,
55
- path_in_repo=str(dest_repo_file),
56
- repo_id=QUEUE_REPO,
57
- token=TOKEN_HUB,
58
- repo_type="dataset",
59
- commit_message=f"Add {requested_model_name} to eval queue")
 
 
60
 
61
  def get_all_requested_models(directory):
62
  directory = Path(directory)
63
  all_requested_models = list(directory.glob("*.txt"))
64
  return all_requested_models
65
 
 
66
  def get_csv_with_results(directory):
67
  directory = Path(directory)
68
  all_csv_files = list(directory.glob("*.csv"))
@@ -72,16 +78,21 @@ def get_csv_with_results(directory):
72
  return latest[0]
73
 
74
 
75
-
76
  def is_model_on_hub(model_name, revision="main") -> bool:
77
  try:
78
- model_name = model_name.replace(" ","")
79
  author = model_name.split("/")[0]
80
  model_id = model_name.split("/")[1]
81
  if len(author) == 0 or len(model_id) == 0:
82
- return False, "is not a valid model name. Please use the format `author/model_name`."
83
- except Exception as e:
84
- return False, "is not a valid model name. Please use the format `author/model_name`."
 
 
 
 
 
 
85
 
86
  try:
87
  models = list(hf_api.list_models(author=author, search=model_id))
 
 
1
  from constants import EVAL_REQUESTS_PATH
2
  from pathlib import Path
3
+ from huggingface_hub import HfApi
4
  from dotenv import load_dotenv
5
+ import git
6
+ import os
7
 
8
  load_dotenv()
9
+
10
+ # Hub to access the dataset repo
11
  TOKEN_HUB = os.environ.get("TOKEN_HUB_V2", None)
12
+ # Name of the repo where the dataset is stored user/repo_name
13
  QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
14
+ # Local path where the repo is cloned to
15
  QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
16
 
17
  hf_api = HfApi(
18
+ endpoint="https://huggingface.co",
19
+ token=TOKEN_HUB,
20
  )
21
 
22
+
23
  def load_all_info_from_dataset_hub():
24
  eval_queue_repo = None
25
+ csv_results = None
26
  requested_models = None
27
 
 
28
  if TOKEN_HUB is None:
29
+ print(
30
+ "No HuggingFace token provided. Skipping evaluation requests and results."
31
+ )
32
+ return eval_queue_repo, requested_models, csv_results
33
  else:
34
  print("Pulling evaluation requests and results.")
35
 
36
+ # Pull the dataset repo
37
+ user_name = QUEUE_REPO.split("/")[0]
38
+ repo_url = (
39
+ f"https://{user_name}:{TOKEN_HUB}@huggingface.co/datasets/{QUEUE_REPO}"
 
40
  )
41
+ git.Repo.clone_from(repo_url, QUEUE_PATH)
42
+
43
  # Local directory where dataset repo is cloned + folder with eval requests
44
  directory = QUEUE_PATH / EVAL_REQUESTS_PATH
45
  requested_models = get_all_requested_models(directory)
46
  requested_models = [p.stem for p in requested_models]
47
  # Local directory where dataset repo is cloned
48
  csv_results = get_csv_with_results(QUEUE_PATH)
 
 
 
 
49
 
50
  return eval_queue_repo, requested_models, csv_results
51
 
 
54
  dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
55
  dest_repo_file = str(dest_repo_file)
56
  hf_api.upload_file(
57
+ path_or_fileobj=path_or_fileobj,
58
+ path_in_repo=str(dest_repo_file),
59
+ repo_id=QUEUE_REPO,
60
+ token=TOKEN_HUB,
61
+ repo_type="dataset",
62
+ commit_message=f"Add {requested_model_name} to eval queue",
63
+ )
64
+
65
 
66
  def get_all_requested_models(directory):
67
  directory = Path(directory)
68
  all_requested_models = list(directory.glob("*.txt"))
69
  return all_requested_models
70
 
71
+
72
  def get_csv_with_results(directory):
73
  directory = Path(directory)
74
  all_csv_files = list(directory.glob("*.csv"))
 
78
  return latest[0]
79
 
80
 
 
81
  def is_model_on_hub(model_name, revision="main") -> bool:
82
  try:
83
+ model_name = model_name.replace(" ", "")
84
  author = model_name.split("/")[0]
85
  model_id = model_name.split("/")[1]
86
  if len(author) == 0 or len(model_id) == 0:
87
+ return (
88
+ False,
89
+ "is not a valid model name. Please use the format `author/model_name`.",
90
+ )
91
+ except Exception:
92
+ return (
93
+ False,
94
+ "is not a valid model name. Please use the format `author/model_name`.",
95
+ )
96
 
97
  try:
98
  models = list(hf_api.list_models(author=author, search=model_id))