Spaces:
Sleeping
Sleeping
__all__ = ['iface', 'calculate_total_size'] | |
import gradio as gr | |
import asyncio | |
import aiohttp | |
from typing import List, Tuple | |
async def get_repo_size(session, repo): | |
url = f'https://huggingface.co/api/models/{repo}/treesize/main' | |
try: | |
async with session.get(url) as resp: | |
if resp.status == 200: | |
resp_json = await resp.json() | |
return resp_json['size'] / 1e9 | |
else: | |
print(f'Did not find repo: {repo}, Status code: {resp.status}') | |
return 0.0 | |
except aiohttp.ClientError as e: | |
print(f"HTTP error for {repo}: {e}") | |
return 0.0 | |
except Exception as e: | |
print(f"An error occurred for {repo}: {e}") | |
return 0.0 | |
async def fetch_all_models(session, author, repo_limit=20000): | |
all_models = [] | |
page = 0 | |
models_per_page = 100 | |
while len(all_models) < repo_limit: | |
models_url = f'https://huggingface.co/api/models?author={author}&full=true&config=true&limit={models_per_page}&skip={page * models_per_page}' | |
async with session.get(models_url) as response: | |
if response.status == 200: | |
models_data = await response.json() | |
if not models_data: | |
break | |
all_models.extend(models_data) | |
page += 1 | |
if len(models_data) < models_per_page: | |
break | |
else: | |
print(f"Error fetching models: {response.status}") | |
return [] | |
if len(all_models) >= repo_limit: | |
print(f"Reached repository limit of {repo_limit} for author '{author}'.") | |
all_models = all_models[:repo_limit] | |
break | |
return all_models | |
async def calculate_total_size(author, repo_limit=20000): | |
async with aiohttp.ClientSession() as session: | |
all_models = await fetch_all_models(session, author, repo_limit) | |
if not all_models: | |
return [["Error fetching models", ""]] | |
total_repos_fetched = len(all_models) | |
total_repos_all = await fetch_total_repo_count(session, author) | |
tasks = [] | |
relevant_models = [] | |
for model in all_models: | |
repo_id = model['modelId'] | |
if 'exl2' not in repo_id.lower(): | |
tasks.append(get_repo_size(session, repo_id)) | |
relevant_models.append(model) | |
repo_sizes_all = await asyncio.gather(*tasks) | |
repo_sizes = [] | |
total_size_gb = 0 | |
for i, repo_size in enumerate(repo_sizes_all): | |
if repo_size > 0: | |
repo_sizes.append([relevant_models[i]['modelId'], repo_size]) | |
total_size_gb += repo_size | |
repo_sizes.append(["Total Repositories Fetched", total_repos_fetched]) | |
repo_sizes.append(["Total Size (non-exl2)", total_size_gb]) | |
if total_repos_all > total_repos_fetched: | |
repo_sizes.append(["API Repo Limit Reached", f"Fetched {total_repos_fetched} out of {total_repos_all} available models."]) | |
return repo_sizes | |
async def fetch_total_repo_count(session, author): | |
url = f'https://huggingface.co/api/models?author={author}&full=false&limit=1' | |
async with session.get(url) as response: | |
if response.status == 200: | |
headers = response.headers | |
count_str = headers.get('x-total-count') | |
return int(count_str) if count_str else 0 | |
else: | |
print(f"Error fetching total repo count: {response.status}") | |
return 0 | |
iface = gr.Interface( | |
fn=lambda author: asyncio.run(calculate_total_size(author)), | |
inputs=gr.Text(value="bartowski"), | |
outputs=gr.Dataframe( | |
headers=["Repository/Info", "Size (GB) / Count"], | |
value=[["", ""]], | |
), | |
title="HuggingFace User Size Calculator", | |
description="Enter a HuggingFace author to calculate the total size of their non-exl2 repositories. Limited to 20,000 Repos. Janked by Gemini-1121" | |
) | |
iface.launch(height=500, width=600) |