instance1 / hf_scrapper.py
ChandimaPrabath's picture
init player
f0de3f7
raw
history blame
2.5 kB
import os
import requests
import json
import urllib.request
from requests.exceptions import RequestException
def get_system_proxies():
try:
proxies = urllib.request.getproxies()
print("System proxies:", proxies)
return {
"http": proxies.get("http"),
"https": proxies.get("http")
}
except Exception as e:
print(f"Error getting system proxies: {e}")
return {}
def stream_file(file_url, token, proxies):
print(f"Streaming file from URL: {file_url} with proxies: {proxies}")
try:
response = requests.get(file_url, headers={'Authorization': f'Bearer {token}'}, proxies=proxies, stream=True)
response.raise_for_status()
for chunk in response.iter_content(chunk_size=8192):
if chunk:
yield chunk
except RequestException as e:
print(f"Error streaming file: {e}")
yield b'' # Return empty bytes to indicate an error
def download_file(file_url, token, output_path, proxies):
print(f"Downloading file from URL: {file_url} with proxies: {proxies}")
try:
response = requests.get(file_url, headers={'Authorization': f'Bearer {token}'}, proxies=proxies, stream=True)
response.raise_for_status()
with open(output_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print(f'File {output_path} downloaded successfully.')
except RequestException as e:
print(f"Error downloading file: {e}")
except IOError as e:
print(f"Error writing file {output_path}: {e}")
def get_file_structure(repo, token, path="", proxies=None):
api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
headers = {'Authorization': f'Bearer {token}'}
print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
try:
response = requests.get(api_url, headers=headers, proxies=proxies)
response.raise_for_status()
return response.json()
except RequestException as e:
print(f"Error fetching file structure: {e}")
return []
def write_file_structure_to_json(file_structure, file_path):
try:
with open(file_path, 'w') as json_file:
json.dump(file_structure, json_file, indent=2)
print(f'File structure written to {file_path}')
except IOError as e:
print(f"Error writing file structure to JSON: {e}")