ChandimaPrabath commited on
Commit
40d796b
·
1 Parent(s): 6be365f
Files changed (1) hide show
  1. hf_scrapper.py +15 -6
hf_scrapper.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import requests
3
  import json
4
  import urllib.request
 
5
  from requests.exceptions import RequestException
6
  from tqdm import tqdm
7
 
@@ -49,7 +50,7 @@ def download_file(file_url, token, cache_path, proxies, film_id, title, chunk_si
49
  response.raise_for_status()
50
 
51
  total_size = int(response.headers.get('content-length', 0))
52
- download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading"}
53
 
54
  os.makedirs(os.path.dirname(cache_path), exist_ok=True)
55
  with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
@@ -68,9 +69,8 @@ def download_file(file_url, token, cache_path, proxies, film_id, title, chunk_si
68
  print(f"Error writing file {cache_path}: {e}")
69
  download_progress[film_id]["status"] = "Failed"
70
  finally:
71
- # Instead of deleting the progress, we mark it as complete or failed
72
  if download_progress[film_id]["status"] != "Downloading":
73
- del download_progress[film_id]
74
 
75
  def get_download_progress(film_id):
76
  """
@@ -80,15 +80,24 @@ def get_download_progress(film_id):
80
  film_id (str): The unique identifier for the film download.
81
 
82
  Returns:
83
- dict: A dictionary containing the total size, downloaded size, progress percentage, and status.
84
  """
85
  if film_id in download_progress:
86
  total = download_progress[film_id]["total"]
87
  downloaded = download_progress[film_id]["downloaded"]
88
  status = download_progress[film_id].get("status", "In Progress")
89
  progress = (downloaded / total) * 100 if total > 0 else 0
90
- return {"total": total, "downloaded": downloaded, "progress": progress, "status": status}
91
- return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found"}
 
 
 
 
 
 
 
 
 
92
 
93
  def update_film_store_json(title, cache_path):
94
  """
 
2
  import requests
3
  import json
4
  import urllib.request
5
+ import time
6
  from requests.exceptions import RequestException
7
  from tqdm import tqdm
8
 
 
50
  response.raise_for_status()
51
 
52
  total_size = int(response.headers.get('content-length', 0))
53
+ download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
54
 
55
  os.makedirs(os.path.dirname(cache_path), exist_ok=True)
56
  with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
 
69
  print(f"Error writing file {cache_path}: {e}")
70
  download_progress[film_id]["status"] = "Failed"
71
  finally:
 
72
  if download_progress[film_id]["status"] != "Downloading":
73
+ download_progress[film_id]["end_time"] = time.time()
74
 
75
  def get_download_progress(film_id):
76
  """
 
80
  film_id (str): The unique identifier for the film download.
81
 
82
  Returns:
83
+ dict: A dictionary containing the total size, downloaded size, progress percentage, status, and ETA.
84
  """
85
  if film_id in download_progress:
86
  total = download_progress[film_id]["total"]
87
  downloaded = download_progress[film_id]["downloaded"]
88
  status = download_progress[film_id].get("status", "In Progress")
89
  progress = (downloaded / total) * 100 if total > 0 else 0
90
+
91
+ eta = None
92
+ if status == "Downloading" and downloaded > 0:
93
+ elapsed_time = time.time() - download_progress[film_id]["start_time"]
94
+ estimated_total_time = elapsed_time * (total / downloaded)
95
+ eta = estimated_total_time - elapsed_time
96
+ elif status == "Completed":
97
+ eta = 0
98
+
99
+ return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
100
+ return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
101
 
102
  def update_film_store_json(title, cache_path):
103
  """