Spaces:
Paused
Paused
Commit
·
40d796b
1
Parent(s):
6be365f
update
Browse files- hf_scrapper.py +15 -6
hf_scrapper.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import requests
|
3 |
import json
|
4 |
import urllib.request
|
|
|
5 |
from requests.exceptions import RequestException
|
6 |
from tqdm import tqdm
|
7 |
|
@@ -49,7 +50,7 @@ def download_file(file_url, token, cache_path, proxies, film_id, title, chunk_si
|
|
49 |
response.raise_for_status()
|
50 |
|
51 |
total_size = int(response.headers.get('content-length', 0))
|
52 |
-
download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading"}
|
53 |
|
54 |
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
55 |
with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
|
@@ -68,9 +69,8 @@ def download_file(file_url, token, cache_path, proxies, film_id, title, chunk_si
|
|
68 |
print(f"Error writing file {cache_path}: {e}")
|
69 |
download_progress[film_id]["status"] = "Failed"
|
70 |
finally:
|
71 |
-
# Instead of deleting the progress, we mark it as complete or failed
|
72 |
if download_progress[film_id]["status"] != "Downloading":
|
73 |
-
|
74 |
|
75 |
def get_download_progress(film_id):
|
76 |
"""
|
@@ -80,15 +80,24 @@ def get_download_progress(film_id):
|
|
80 |
film_id (str): The unique identifier for the film download.
|
81 |
|
82 |
Returns:
|
83 |
-
dict: A dictionary containing the total size, downloaded size, progress percentage, and
|
84 |
"""
|
85 |
if film_id in download_progress:
|
86 |
total = download_progress[film_id]["total"]
|
87 |
downloaded = download_progress[film_id]["downloaded"]
|
88 |
status = download_progress[film_id].get("status", "In Progress")
|
89 |
progress = (downloaded / total) * 100 if total > 0 else 0
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
def update_film_store_json(title, cache_path):
|
94 |
"""
|
|
|
2 |
import requests
|
3 |
import json
|
4 |
import urllib.request
|
5 |
+
import time
|
6 |
from requests.exceptions import RequestException
|
7 |
from tqdm import tqdm
|
8 |
|
|
|
50 |
response.raise_for_status()
|
51 |
|
52 |
total_size = int(response.headers.get('content-length', 0))
|
53 |
+
download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
|
54 |
|
55 |
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
56 |
with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
|
|
|
69 |
print(f"Error writing file {cache_path}: {e}")
|
70 |
download_progress[film_id]["status"] = "Failed"
|
71 |
finally:
|
|
|
72 |
if download_progress[film_id]["status"] != "Downloading":
|
73 |
+
download_progress[film_id]["end_time"] = time.time()
|
74 |
|
75 |
def get_download_progress(film_id):
|
76 |
"""
|
|
|
80 |
film_id (str): The unique identifier for the film download.
|
81 |
|
82 |
Returns:
|
83 |
+
dict: A dictionary containing the total size, downloaded size, progress percentage, status, and ETA.
|
84 |
"""
|
85 |
if film_id in download_progress:
|
86 |
total = download_progress[film_id]["total"]
|
87 |
downloaded = download_progress[film_id]["downloaded"]
|
88 |
status = download_progress[film_id].get("status", "In Progress")
|
89 |
progress = (downloaded / total) * 100 if total > 0 else 0
|
90 |
+
|
91 |
+
eta = None
|
92 |
+
if status == "Downloading" and downloaded > 0:
|
93 |
+
elapsed_time = time.time() - download_progress[film_id]["start_time"]
|
94 |
+
estimated_total_time = elapsed_time * (total / downloaded)
|
95 |
+
eta = estimated_total_time - elapsed_time
|
96 |
+
elif status == "Completed":
|
97 |
+
eta = 0
|
98 |
+
|
99 |
+
return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
|
100 |
+
return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
|
101 |
|
102 |
def update_film_store_json(title, cache_path):
|
103 |
"""
|