Spaces:
Paused
Paused
import os | |
import requests | |
import json | |
import urllib.request | |
from requests.exceptions import RequestException | |
from concurrent.futures import ThreadPoolExecutor | |
def get_system_proxies(): | |
try: | |
proxies = urllib.request.getproxies() | |
print("System proxies:", proxies) | |
return { | |
"http": proxies.get("http"), | |
"https": proxies.get("http") | |
} | |
except Exception as e: | |
print(f"Error getting system proxies: {e}") | |
return {} | |
def download_and_cache_file(file_url, token, cache_path, proxies=None): | |
print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}") | |
# Create a requests session for better performance | |
session = requests.Session() | |
session.headers.update({'Authorization': f'Bearer {token}'}) | |
session.proxies.update(proxies) | |
try: | |
response = session.get(file_url, stream=True) | |
response.raise_for_status() | |
os.makedirs(os.path.dirname(cache_path), exist_ok=True) | |
with open(cache_path, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=16384): # Larger chunk size | |
if chunk: | |
f.write(chunk) | |
print(f'File cached to {cache_path} successfully.') | |
return True | |
except RequestException as e: | |
print(f"Error downloading file: {e}") | |
except IOError as e: | |
print(f"Error writing file {cache_path}: {e}") | |
return False | |
def get_file_structure(repo, token, path="", proxies=None): | |
api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}" | |
headers = {'Authorization': f'Bearer {token}'} | |
print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}") | |
try: | |
response = requests.get(api_url, headers=headers, proxies=proxies) | |
response.raise_for_status() | |
return response.json() | |
except RequestException as e: | |
print(f"Error fetching file structure: {e}") | |
return [] | |
def write_file_structure_to_json(file_structure, file_path): | |
try: | |
with open(file_path, 'w') as json_file: | |
json.dump(file_structure, json_file, indent=2) | |
print(f'File structure written to {file_path}') | |
except IOError as e: | |
print(f"Error writing file structure to JSON: {e}") | |
# Function to download files in parallel | |
def parallel_downloads(file_urls, token, cache_dir, proxies=None): | |
with ThreadPoolExecutor() as executor: | |
futures = [] | |
for file_url in file_urls: | |
filename = file_url.split("/")[-1] | |
cache_path = os.path.join(cache_dir, filename) | |
futures.append(executor.submit(download_and_cache_file, file_url, token, cache_path, proxies)) | |
# Wait for all futures to complete | |
for future in futures: | |
future.result() | |
if __name__ == "__main__": | |
file_urls = [ | |
"https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4" | |
] | |
token = os.getenv("TOKEN") | |
cache_dir = "tmp/cache/films" | |
proxies = get_system_proxies() | |
parallel_downloads(file_urls, token, cache_dir, proxies) | |