Spaces:

SaisExperiments
/

HF-User-Sizer

Sleeping

App Files Files Community

HF-User-Sizer / app.py

SaisExperiments

Create app.py

d098629 verified 3 months ago

raw

history blame contribute delete

4.02 kB

	__all__ = ['iface', 'calculate_total_size']

	import gradio as gr
	import asyncio
	import aiohttp
	from typing import List, Tuple

	async def get_repo_size(session, repo):
	url = f'https://huggingface.co/api/models/{repo}/treesize/main'
	try:
	async with session.get(url) as resp:
	if resp.status == 200:
	resp_json = await resp.json()
	return resp_json['size'] / 1e9
	else:
	print(f'Did not find repo: {repo}, Status code: {resp.status}')
	return 0.0
	except aiohttp.ClientError as e:
	print(f"HTTP error for {repo}: {e}")
	return 0.0
	except Exception as e:
	print(f"An error occurred for {repo}: {e}")
	return 0.0

	async def fetch_all_models(session, author, repo_limit=20000):
	all_models = []
	page = 0
	models_per_page = 100
	while len(all_models) < repo_limit:
	models_url = f'https://huggingface.co/api/models?author={author}&full=true&config=true&limit={models_per_page}&skip={page * models_per_page}'
	async with session.get(models_url) as response:
	if response.status == 200:
	models_data = await response.json()
	if not models_data:
	break
	all_models.extend(models_data)
	page += 1
	if len(models_data) < models_per_page:
	break
	else:
	print(f"Error fetching models: {response.status}")
	return []
	if len(all_models) >= repo_limit:
	print(f"Reached repository limit of {repo_limit} for author '{author}'.")
	all_models = all_models[:repo_limit]
	break
	return all_models

	async def calculate_total_size(author, repo_limit=20000):
	async with aiohttp.ClientSession() as session:
	all_models = await fetch_all_models(session, author, repo_limit)
	if not all_models:
	return [["Error fetching models", ""]]

	total_repos_fetched = len(all_models)
	total_repos_all = await fetch_total_repo_count(session, author)

	tasks = []
	relevant_models = []

	for model in all_models:
	repo_id = model['modelId']
	if 'exl2' not in repo_id.lower():
	tasks.append(get_repo_size(session, repo_id))
	relevant_models.append(model)

	repo_sizes_all = await asyncio.gather(*tasks)
	repo_sizes = []
	total_size_gb = 0

	for i, repo_size in enumerate(repo_sizes_all):
	if repo_size > 0:
	repo_sizes.append([relevant_models[i]['modelId'], repo_size])
	total_size_gb += repo_size

	repo_sizes.append(["Total Repositories Fetched", total_repos_fetched])
	repo_sizes.append(["Total Size (non-exl2)", total_size_gb])
	if total_repos_all > total_repos_fetched:
	repo_sizes.append(["API Repo Limit Reached", f"Fetched {total_repos_fetched} out of {total_repos_all} available models."])
	return repo_sizes

	async def fetch_total_repo_count(session, author):
	url = f'https://huggingface.co/api/models?author={author}&full=false&limit=1'
	async with session.get(url) as response:
	if response.status == 200:
	headers = response.headers
	count_str = headers.get('x-total-count')
	return int(count_str) if count_str else 0
	else:
	print(f"Error fetching total repo count: {response.status}")
	return 0

	iface = gr.Interface(
	fn=lambda author: asyncio.run(calculate_total_size(author)),
	inputs=gr.Text(value="bartowski"),
	outputs=gr.Dataframe(
	headers=["Repository/Info", "Size (GB) / Count"],
	value=[["", ""]],
	),
	title="HuggingFace User Size Calculator",
	description="Enter a HuggingFace author to calculate the total size of their non-exl2 repositories. Limited to 20,000 Repos. Janked by Gemini-1121"
	)
	iface.launch(height=500, width=600)