Spaces:
Build error
Build error
import os | |
import pickle | |
from datetime import datetime | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
from huggingface_hub import HfApi | |
# Define colors for each language | |
LANGUAGE_COLORS = { | |
"english": "orange", | |
"spanish": "blue", | |
} | |
def fetch_models(cache_file="models_cache.pkl"): | |
"""Fetch and filter models from HuggingFace Hub with caching""" | |
# Check if cached data exists and is less than 24 hours old | |
if os.path.exists(cache_file): | |
cache_age = datetime.now().timestamp() - os.path.getmtime(cache_file) | |
if cache_age < 24 * 3600: # 24 hours in seconds | |
print("Loading models from cache...") | |
with open(cache_file, "rb") as f: | |
return pickle.load(f) | |
else: | |
print("Cache is older than 24 hours, fetching fresh data...") | |
else: | |
print("No cache found, fetching models from Hugging Face Hub...") | |
hf_api = HfApi() | |
all_models = list(hf_api.list_models(full=True)) | |
# Filter models by language | |
english_filter = filter( | |
lambda m: any(tag == "language:en" for tag in m.tags) | |
and not any( | |
tag.startswith("language:") and tag != "language:en" for tag in m.tags | |
), | |
all_models, | |
) | |
spanish_filter = filter( | |
lambda m: any(tag == "language:es" for tag in m.tags) | |
and not any( | |
tag.startswith("language:") and tag != "language:es" for tag in m.tags | |
), | |
all_models, | |
) | |
filtered_models = { | |
"english": list(english_filter), | |
"spanish": list(spanish_filter), | |
} | |
# Cache the filtered models | |
print("Saving models to cache...") | |
with open(cache_file, "wb") as f: | |
pickle.dump(filtered_models, f) | |
return filtered_models | |
def create_stack_area_plot(models, output_dir): | |
"""Create stacked area plot for English and Spanish models""" | |
# Prepare data for all languages | |
all_dates = [] | |
languages = ["english", "spanish"] | |
for lang in languages: | |
all_dates.extend([d.created_at.date() for d in models[lang]]) | |
if not all_dates: | |
print("No models found for any language. Skipping plot creation.") | |
return | |
# Create a common date range for all languages | |
min_date = min(all_dates) | |
max_date = max(all_dates) | |
date_range = pd.date_range(start=min_date, end=max_date, freq="MS") | |
# Create separate DataFrames for each language | |
dfs = {} | |
for lang in languages: | |
dates = [d.created_at.date() for d in models[lang]] | |
df = pd.DataFrame({"Date": dates}) | |
df["Count"] = 1 | |
df["Date"] = pd.to_datetime(df["Date"]) | |
# Reindex to common date range and fill missing values with 0 | |
df_grouped = df.groupby(pd.Grouper(key="Date", freq="MS")).sum() | |
df_grouped = df_grouped.reindex(date_range, fill_value=0) | |
dfs[lang] = df_grouped.cumsum() | |
# Plot stacked area for English and Spanish | |
plt.figure(figsize=(10, 6)) | |
plt.stackplot( | |
date_range, | |
[dfs[lang]["Count"].values for lang in languages], | |
labels=["English", "Spanish"], | |
colors=[LANGUAGE_COLORS[lang] for lang in languages], | |
) | |
plt.xlabel("Date", fontsize=10) | |
plt.ylabel("Cumulative Number of Models", fontsize=10) | |
plt.xticks(rotation=45, fontsize=10) | |
plt.legend(loc="upper left") | |
plt.tight_layout() | |
plt.savefig(f"{output_dir}/models_stack_area_en_es.png") | |
plt.close() | |
def main(): | |
# Create output directory if it doesn't exist | |
output_dir = "plots" | |
os.makedirs(output_dir, exist_ok=True) | |
# Fetch models | |
print("Fetching models from Hugging Face Hub...") | |
models = fetch_models() | |
# Print model counts | |
print("\nModel counts:") | |
for lang, models_list in models.items(): | |
print(f"{lang.capitalize()}: {len(models_list)}") | |
# Create visualization | |
print("\nCreating stack area plot...") | |
create_stack_area_plot(models, output_dir) | |
print(f"Plot has been saved to the '{output_dir}' directory") | |
if __name__ == "__main__": | |
main() | |