HF-User-Sizer / app.py
SaisExperiments's picture
Create app.py
d098629 verified
__all__ = ['iface', 'calculate_total_size']
import gradio as gr
import asyncio
import aiohttp
from typing import List, Tuple
async def get_repo_size(session, repo):
url = f'https://huggingface.co/api/models/{repo}/treesize/main'
try:
async with session.get(url) as resp:
if resp.status == 200:
resp_json = await resp.json()
return resp_json['size'] / 1e9
else:
print(f'Did not find repo: {repo}, Status code: {resp.status}')
return 0.0
except aiohttp.ClientError as e:
print(f"HTTP error for {repo}: {e}")
return 0.0
except Exception as e:
print(f"An error occurred for {repo}: {e}")
return 0.0
async def fetch_all_models(session, author, repo_limit=20000):
all_models = []
page = 0
models_per_page = 100
while len(all_models) < repo_limit:
models_url = f'https://huggingface.co/api/models?author={author}&full=true&config=true&limit={models_per_page}&skip={page * models_per_page}'
async with session.get(models_url) as response:
if response.status == 200:
models_data = await response.json()
if not models_data:
break
all_models.extend(models_data)
page += 1
if len(models_data) < models_per_page:
break
else:
print(f"Error fetching models: {response.status}")
return []
if len(all_models) >= repo_limit:
print(f"Reached repository limit of {repo_limit} for author '{author}'.")
all_models = all_models[:repo_limit]
break
return all_models
async def calculate_total_size(author, repo_limit=20000):
async with aiohttp.ClientSession() as session:
all_models = await fetch_all_models(session, author, repo_limit)
if not all_models:
return [["Error fetching models", ""]]
total_repos_fetched = len(all_models)
total_repos_all = await fetch_total_repo_count(session, author)
tasks = []
relevant_models = []
for model in all_models:
repo_id = model['modelId']
if 'exl2' not in repo_id.lower():
tasks.append(get_repo_size(session, repo_id))
relevant_models.append(model)
repo_sizes_all = await asyncio.gather(*tasks)
repo_sizes = []
total_size_gb = 0
for i, repo_size in enumerate(repo_sizes_all):
if repo_size > 0:
repo_sizes.append([relevant_models[i]['modelId'], repo_size])
total_size_gb += repo_size
repo_sizes.append(["Total Repositories Fetched", total_repos_fetched])
repo_sizes.append(["Total Size (non-exl2)", total_size_gb])
if total_repos_all > total_repos_fetched:
repo_sizes.append(["API Repo Limit Reached", f"Fetched {total_repos_fetched} out of {total_repos_all} available models."])
return repo_sizes
async def fetch_total_repo_count(session, author):
url = f'https://huggingface.co/api/models?author={author}&full=false&limit=1'
async with session.get(url) as response:
if response.status == 200:
headers = response.headers
count_str = headers.get('x-total-count')
return int(count_str) if count_str else 0
else:
print(f"Error fetching total repo count: {response.status}")
return 0
iface = gr.Interface(
fn=lambda author: asyncio.run(calculate_total_size(author)),
inputs=gr.Text(value="bartowski"),
outputs=gr.Dataframe(
headers=["Repository/Info", "Size (GB) / Count"],
value=[["", ""]],
),
title="HuggingFace User Size Calculator",
description="Enter a HuggingFace author to calculate the total size of their non-exl2 repositories. Limited to 20,000 Repos. Janked by Gemini-1121"
)
iface.launch(height=500, width=600)