ChandimaPrabath commited on
Commit
4a021bd
·
1 Parent(s): fdc1154

0.0.2.9 V Beta

Browse files
Files changed (4) hide show
  1. LoadBalancer.py +1 -1
  2. hf_scrapper.py +22 -22
  3. indexer.py +30 -17
  4. requirements.txt +2 -0
LoadBalancer.py CHANGED
@@ -14,7 +14,7 @@ download_progress = {}
14
 
15
  class LoadBalancer:
16
  def __init__(self, cache_dir, index_file, token, repo, polling_interval=4, max_retries=3, initial_delay=1):
17
- self.version = "0.0.2.7 V Beta"
18
  self.instances = []
19
  self.instances_health = {}
20
  self.polling_interval = polling_interval
 
14
 
15
  class LoadBalancer:
16
  def __init__(self, cache_dir, index_file, token, repo, polling_interval=4, max_retries=3, initial_delay=1):
17
+ self.version = "0.0.2.9 V Beta"
18
  self.instances = []
19
  self.instances_health = {}
20
  self.polling_interval = polling_interval
hf_scrapper.py CHANGED
@@ -1,17 +1,19 @@
1
  import os
2
- import requests
3
  import json
 
 
 
4
  import urllib.request
5
- import time
6
- from requests.exceptions import RequestException
7
- from tqdm import tqdm
8
 
9
  CACHE_DIR = os.getenv("CACHE_DIR")
10
  CACHE_JSON_PATH = os.path.join(CACHE_DIR, "cached_films.json")
11
 
12
  download_progress = {}
13
 
14
- def get_system_proxies():
15
  """
16
  Retrieves the system's HTTP and HTTPS proxies.
17
 
@@ -29,7 +31,7 @@ def get_system_proxies():
29
  print(f"Error getting system proxies: {e}")
30
  return {}
31
 
32
- def get_file_structure(repo, token, path="", proxies=None):
33
  """
34
  Fetches the file structure of a specified Hugging Face repository.
35
 
@@ -44,16 +46,18 @@ def get_file_structure(repo, token, path="", proxies=None):
44
  """
45
  api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
46
  headers = {'Authorization': f'Bearer {token}'}
47
- print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
48
- try:
49
- response = requests.get(api_url, headers=headers, proxies=proxies)
50
- response.raise_for_status()
51
- return response.json()
52
- except RequestException as e:
53
- print(f"Error fetching file structure: {e}")
54
- return []
 
 
55
 
56
- def write_file_structure_to_json(file_structure, file_path):
57
  """
58
  Writes the file structure to a JSON file.
59
 
@@ -62,12 +66,8 @@ def write_file_structure_to_json(file_structure, file_path):
62
  file_path (str): The path where the JSON file will be saved.
63
  """
64
  try:
65
- with open(file_path, 'w') as json_file:
66
- json.dump(file_structure, json_file, indent=2)
67
  print(f'File structure written to {file_path}')
68
  except IOError as e:
69
- print(f"Error writing file structure to JSON: {e}")
70
-
71
- if __name__ == "__main__":
72
- file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
73
- token = os.getenv("TOKEN")
 
1
  import os
 
2
  import json
3
+ import aiohttp
4
+ import aiofiles
5
+ import asyncio
6
  import urllib.request
7
+ from aiohttp import ClientSession, ClientTimeout
8
+ from aiohttp.client_exceptions import ClientError
9
+ from tqdm.asyncio import tqdm
10
 
11
  CACHE_DIR = os.getenv("CACHE_DIR")
12
  CACHE_JSON_PATH = os.path.join(CACHE_DIR, "cached_films.json")
13
 
14
  download_progress = {}
15
 
16
+ async def get_system_proxies():
17
  """
18
  Retrieves the system's HTTP and HTTPS proxies.
19
 
 
31
  print(f"Error getting system proxies: {e}")
32
  return {}
33
 
34
+ async def get_file_structure(repo, token, path="", proxies=None):
35
  """
36
  Fetches the file structure of a specified Hugging Face repository.
37
 
 
46
  """
47
  api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
48
  headers = {'Authorization': f'Bearer {token}'}
49
+ timeout = ClientTimeout(total=10)
50
+ async with ClientSession(timeout=timeout) as session:
51
+ print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
52
+ try:
53
+ async with session.get(api_url, headers=headers, proxy=proxies.get("http")) as response:
54
+ response.raise_for_status()
55
+ return await response.json()
56
+ except ClientError as e:
57
+ print(f"Error fetching file structure: {e}")
58
+ return []
59
 
60
+ async def write_file_structure_to_json(file_structure, file_path):
61
  """
62
  Writes the file structure to a JSON file.
63
 
 
66
  file_path (str): The path where the JSON file will be saved.
67
  """
68
  try:
69
+ async with aiofiles.open(file_path, 'w') as json_file:
70
+ await json_file.write(json.dumps(file_structure, indent=2))
71
  print(f'File structure written to {file_path}')
72
  except IOError as e:
73
+ print(f"Error writing file structure to JSON: {e}")
 
 
 
 
indexer.py CHANGED
@@ -1,4 +1,5 @@
1
  import json
 
2
  from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json
3
  from dotenv import load_dotenv
4
  import os
@@ -6,26 +7,38 @@ import os
6
  load_dotenv()
7
 
8
  def index_repository(token, repo, current_path="", proxies=None):
9
- file_structure = get_file_structure(repo, token, current_path, proxies)
10
- full_structure = []
11
- for item in file_structure:
12
- if item['type'] == 'directory':
13
- sub_directory_structure = index_repository(token, repo, item['path'], proxies)
14
- full_structure.append({
15
- "type": "directory",
16
- "path": item['path'],
17
- "contents": sub_directory_structure
18
- })
19
- else:
20
- full_structure.append(item)
21
- return full_structure
 
 
 
 
22
 
23
  def indexer():
24
  token = os.getenv("TOKEN")
25
  repo = os.getenv("REPO")
26
  output_path = os.getenv("INDEX_FILE")
 
 
 
 
 
27
  proxies = get_system_proxies()
28
- full_structure = index_repository(token, repo, "", proxies)
29
- write_file_structure_to_json(full_structure, output_path)
30
- print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")
31
-
 
 
 
 
1
  import json
2
+ import logging
3
  from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json
4
  from dotenv import load_dotenv
5
  import os
 
7
  load_dotenv()
8
 
9
  def index_repository(token, repo, current_path="", proxies=None):
10
+ try:
11
+ file_structure = get_file_structure(repo, token, current_path, proxies)
12
+ full_structure = []
13
+ for item in file_structure:
14
+ if item['type'] == 'directory':
15
+ sub_directory_structure = index_repository(token, repo, item['path'], proxies)
16
+ full_structure.append({
17
+ "type": "directory",
18
+ "path": item['path'],
19
+ "contents": sub_directory_structure
20
+ })
21
+ else:
22
+ full_structure.append(item)
23
+ return full_structure
24
+ except Exception as e:
25
+ logging.error(f"Error indexing repository: {e}")
26
+ raise
27
 
28
  def indexer():
29
  token = os.getenv("TOKEN")
30
  repo = os.getenv("REPO")
31
  output_path = os.getenv("INDEX_FILE")
32
+
33
+ if not token or not repo or not output_path:
34
+ logging.error("Environment variables TOKEN, REPO, or INDEX_FILE are not set.")
35
+ return
36
+
37
  proxies = get_system_proxies()
38
+
39
+ try:
40
+ full_structure = index_repository(token, repo, "", proxies)
41
+ write_file_structure_to_json(full_structure, output_path)
42
+ logging.info(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")
43
+ except Exception as e:
44
+ logging.error(f"Error during indexing: {e}")
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
  fastapi
2
  aiofiles
 
 
3
  uvicorn
4
  requests
5
  python-dotenv
 
1
  fastapi
2
  aiofiles
3
+ aiohttp
4
+ tqdm
5
  uvicorn
6
  requests
7
  python-dotenv