File size: 3,187 Bytes
5efa5ee
 
 
 
 
9b708eb
5efa5ee
 
 
 
 
 
 
ac28d89
5efa5ee
 
 
 
 
feb5ce7
 
9b708eb
 
 
 
 
 
feb5ce7
9b708eb
feb5ce7
 
9b708eb
feb5ce7
9b708eb
feb5ce7
 
 
 
 
 
 
 
 
5efa5ee
 
 
 
 
9b708eb
5efa5ee
f0de3f7
5efa5ee
 
 
 
 
 
 
 
 
 
 
 
 
feb5ce7
9b708eb
 
 
 
 
 
 
 
 
 
 
feb5ce7
 
9b708eb
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import requests
import json
import urllib.request
from requests.exceptions import RequestException
from concurrent.futures import ThreadPoolExecutor

def get_system_proxies():
    try:
        proxies = urllib.request.getproxies()
        print("System proxies:", proxies)
        return {
            "http": proxies.get("http"),
            "https": proxies.get("http")
        }
    except Exception as e:
        print(f"Error getting system proxies: {e}")
        return {}

def download_and_cache_file(file_url, token, cache_path, proxies=None):
    print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")

    # Create a requests session for better performance
    session = requests.Session()
    session.headers.update({'Authorization': f'Bearer {token}'})
    session.proxies.update(proxies)

    try:
        response = session.get(file_url, stream=True)
        response.raise_for_status()
        os.makedirs(os.path.dirname(cache_path), exist_ok=True)

        with open(cache_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=16384):  # Larger chunk size
                if chunk:
                    f.write(chunk)
        print(f'File cached to {cache_path} successfully.')
        return True
    except RequestException as e:
        print(f"Error downloading file: {e}")
    except IOError as e:
        print(f"Error writing file {cache_path}: {e}")
    return False

def get_file_structure(repo, token, path="", proxies=None):
    api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
    headers = {'Authorization': f'Bearer {token}'}
    print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")

    try:
        response = requests.get(api_url, headers=headers, proxies=proxies)
        response.raise_for_status()
        return response.json()
    except RequestException as e:
        print(f"Error fetching file structure: {e}")
        return []

def write_file_structure_to_json(file_structure, file_path):
    try:
        with open(file_path, 'w') as json_file:
            json.dump(file_structure, json_file, indent=2)
        print(f'File structure written to {file_path}')
    except IOError as e:
        print(f"Error writing file structure to JSON: {e}")

# Function to download files in parallel
def parallel_downloads(file_urls, token, cache_dir, proxies=None):
    with ThreadPoolExecutor() as executor:
        futures = []
        for file_url in file_urls:
            filename = file_url.split("/")[-1]
            cache_path = os.path.join(cache_dir, filename)
            futures.append(executor.submit(download_and_cache_file, file_url, token, cache_path, proxies))
        # Wait for all futures to complete
        for future in futures:
            future.result()

if __name__ == "__main__":
    file_urls = [
        "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
    ]
    token = os.getenv("TOKEN")
    cache_dir = "tmp/cache/films"
    proxies = get_system_proxies()

    parallel_downloads(file_urls, token, cache_dir, proxies)