import os import requests import json import urllib.request from requests.exceptions import RequestException from concurrent.futures import ThreadPoolExecutor def get_system_proxies(): try: proxies = urllib.request.getproxies() print("System proxies:", proxies) return { "http": proxies.get("http"), "https": proxies.get("http") } except Exception as e: print(f"Error getting system proxies: {e}") return {} def download_and_cache_file(file_url, token, cache_path, proxies=None): print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}") # Create a requests session for better performance session = requests.Session() session.headers.update({'Authorization': f'Bearer {token}'}) session.proxies.update(proxies) try: response = session.get(file_url, stream=True) response.raise_for_status() os.makedirs(os.path.dirname(cache_path), exist_ok=True) with open(cache_path, 'wb') as f: for chunk in response.iter_content(chunk_size=16384): # Larger chunk size if chunk: f.write(chunk) print(f'File cached to {cache_path} successfully.') return True except RequestException as e: print(f"Error downloading file: {e}") except IOError as e: print(f"Error writing file {cache_path}: {e}") return False def get_file_structure(repo, token, path="", proxies=None): api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}" headers = {'Authorization': f'Bearer {token}'} print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}") try: response = requests.get(api_url, headers=headers, proxies=proxies) response.raise_for_status() return response.json() except RequestException as e: print(f"Error fetching file structure: {e}") return [] def write_file_structure_to_json(file_structure, file_path): try: with open(file_path, 'w') as json_file: json.dump(file_structure, json_file, indent=2) print(f'File structure written to {file_path}') except IOError as e: print(f"Error writing file structure to JSON: {e}") # Function to download files in parallel def parallel_downloads(file_urls, token, cache_dir, proxies=None): with ThreadPoolExecutor() as executor: futures = [] for file_url in file_urls: filename = file_url.split("/")[-1] cache_path = os.path.join(cache_dir, filename) futures.append(executor.submit(download_and_cache_file, file_url, token, cache_path, proxies)) # Wait for all futures to complete for future in futures: future.result() if __name__ == "__main__": file_urls = [ "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4" ] token = os.getenv("TOKEN") cache_dir = "tmp/cache/films" proxies = get_system_proxies() parallel_downloads(file_urls, token, cache_dir, proxies)