import gradio as gr from transformers import AutoModel, AutoConfig import torch import json from collections import defaultdict, OrderedDict def analyze_model_parameters(model_path, hf_token=None, show_layer_details=False): try: # Prepare token parameter token_kwargs = {} if hf_token and hf_token.strip(): token_kwargs['token'] = hf_token.strip() # Load model configuration first config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, **token_kwargs) # Load model on CPU model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True, **token_kwargs) # Initialize counters total_params = 0 trainable_params = 0 embedding_params = 0 non_embedding_params = 0 # Track unique parameters to handle weight tying unique_params = {} param_details = [] layer_breakdown = defaultdict(lambda: {'total': 0, 'trainable': 0, 'params': []}) # Embedding layer patterns (common names for embedding layers) embedding_patterns = [ 'embeddings', 'embed', 'wte', 'wpe', 'word_embedding', 'position_embedding', 'token_embedding', 'embed_tokens', 'embed_positions', 'embed_layer_norm' ] def is_embedding_param(name): name_lower = name.lower() return any(pattern in name_lower for pattern in embedding_patterns) def get_layer_name(param_name): """Extract layer information from parameter name""" parts = param_name.split('.') if len(parts) >= 2: # Handle common transformer architectures if 'layer' in parts or 'layers' in parts: for i, part in enumerate(parts): if part in ['layer', 'layers'] and i + 1 < len(parts): try: layer_num = int(parts[i + 1]) return f"Layer {layer_num}" except ValueError: pass # Handle other patterns if 'encoder' in parts: return "Encoder" elif 'decoder' in parts: return "Decoder" elif any(emb in param_name.lower() for emb in embedding_patterns): return "Embeddings" elif 'classifier' in param_name.lower() or 'head' in param_name.lower(): return "Classification Head" elif 'pooler' in param_name.lower(): return "Pooler" elif 'ln' in param_name.lower() or 'norm' in param_name.lower(): return "Layer Norm" return "Other" # Analyze all parameters for name, param in model.named_parameters(): param_size = param.numel() is_trainable = param.requires_grad is_embedding = is_embedding_param(name) layer_name = get_layer_name(name) # Handle weight tying by using data pointer ptr = param.data_ptr() if ptr not in unique_params: unique_params[ptr] = { 'name': name, 'size': param_size, 'trainable': is_trainable, 'embedding': is_embedding, 'layer': layer_name, 'shape': list(param.shape) } # Add to totals total_params += param_size if is_trainable: trainable_params += param_size if is_embedding: embedding_params += param_size else: non_embedding_params += param_size # Add to layer breakdown layer_breakdown[layer_name]['total'] += param_size if is_trainable: layer_breakdown[layer_name]['trainable'] += param_size # Add parameter details param_details.append({ 'name': name, 'shape': list(param.shape), 'size': param_size, 'trainable': is_trainable, 'embedding': is_embedding, 'layer': layer_name, 'shared': ptr in [p['ptr'] for p in param_details if 'ptr' in p], 'ptr': ptr }) # Add to layer breakdown details layer_breakdown[layer_name]['params'].append({ 'name': name, 'shape': list(param.shape), 'size': param_size, 'trainable': is_trainable }) # Format the summary summary = f""" šŸ” **MODEL ANALYSIS: {model_path}** šŸ“Š **PARAMETER SUMMARY** ā”œā”€ā”€ Total Parameters: {total_params:,} ā”œā”€ā”€ Trainable Parameters: {trainable_params:,} ā”œā”€ā”€ Non-trainable Parameters: {total_params - trainable_params:,} └── Trainable Percentage: {(trainable_params/total_params*100):.1f}% 🧠 **PARAMETER BREAKDOWN** ā”œā”€ā”€ Embedding Parameters: {embedding_params:,} ({embedding_params/total_params*100:.1f}%) └── Non-embedding Parameters: {non_embedding_params:,} ({non_embedding_params/total_params*100:.1f}%) šŸ“‹ **MODEL INFO** ā”œā”€ā”€ Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'} ā”œā”€ā”€ Architecture: {config.architectures[0] if hasattr(config, 'architectures') and config.architectures else 'Unknown'} └── Hidden Size: {getattr(config, 'hidden_size', 'Unknown')} """ # Add layer breakdown summary if layer_breakdown: summary += "\nšŸ—ļø **LAYER BREAKDOWN SUMMARY**\n" sorted_layers = sorted(layer_breakdown.items(), key=lambda x: ( 0 if x[0] == "Embeddings" else 1 if x[0].startswith("Layer") else 2 if x[0] == "Layer Norm" else 3 if x[0] == "Pooler" else 4 if x[0] == "Classification Head" else 5 )) for layer_name, info in sorted_layers: percentage = info['total'] / total_params * 100 summary += f"ā”œā”€ā”€ {layer_name}: {info['total']:,} params ({percentage:.1f}%)\n" # Detailed layer breakdown if requested layer_details = "" if show_layer_details: layer_details = "\n" + "="*60 + "\n" layer_details += "šŸ” **DETAILED LAYER-BY-LAYER BREAKDOWN**\n" layer_details += "="*60 + "\n" for layer_name, info in sorted_layers: layer_details += f"\nšŸ“ **{layer_name.upper()}**\n" layer_details += f" Total: {info['total']:,} | Trainable: {info['trainable']:,}\n" layer_details += f" Parameters:\n" for param_info in info['params']: trainable_mark = "āœ“" if param_info['trainable'] else "āœ—" layer_details += f" {trainable_mark} {param_info['name']}: {param_info['shape']} → {param_info['size']:,}\n" return summary + layer_details except Exception as e: error_msg = str(e) if "401" in error_msg or "authentication" in error_msg.lower(): return f"šŸ”’ **Authentication Error:** This model requires a valid HuggingFace token.\n\nPlease provide your HuggingFace token in the token field above.\n\nOriginal error: {error_msg}" elif "404" in error_msg or "not found" in error_msg.lower(): return f"šŸ” **Model Not Found:** The model '{model_path}' was not found.\n\nPlease check:\n- Model path is correct\n- Model exists on HuggingFace Hub\n- You have access to the model (use token if private)\n\nOriginal error: {error_msg}" else: return f"āŒ **Error loading model:** {error_msg}\n\nPlease check that the model path is correct and accessible." def count_parameters_basic(model_path, hf_token=None): """Basic parameter counting without layer details""" return analyze_model_parameters(model_path, hf_token, show_layer_details=False) def count_parameters_detailed(model_path, hf_token=None): """Detailed parameter counting with layer-by-layer breakdown""" return analyze_model_parameters(model_path, hf_token, show_layer_details=True) # Create Gradio interface with multiple outputs with gr.Blocks(title="šŸ¤— Advanced HuggingFace Model Parameter Analyzer", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # šŸ¤— Advanced HuggingFace Model Parameter Analyzer Enter any HuggingFace model path to get detailed parameter analysis including: - **Total & trainable parameter counts** - **Embedding vs non-embedding breakdown** - **Layer-by-layer analysis** - **Weight sharing detection** - **Private model access** with HuggingFace token """) with gr.Row(): with gr.Column(scale=2): model_input = gr.Textbox( label="šŸ” HuggingFace Model Path", placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium", value="bert-base-uncased" ) with gr.Column(scale=1): hf_token_input = gr.Textbox( label="šŸ”‘ HuggingFace Token (Optional)", placeholder="hf_...", type="password", info="Required for private models or gated models" ) with gr.Row(): analyze_btn = gr.Button("šŸ“Š Analyze Model", variant="primary") detailed_btn = gr.Button("šŸ” Detailed Analysis", variant="secondary") output_text = gr.Textbox( label="šŸ“‹ Analysis Results", lines=20, max_lines=50, show_copy_button=True ) # Event handlers analyze_btn.click( fn=count_parameters_basic, inputs=[model_input, hf_token_input], outputs=output_text ) detailed_btn.click( fn=count_parameters_detailed, inputs=[model_input, hf_token_input], outputs=output_text ) # Example models gr.Examples( examples=[ ["bert-base-uncased"], ["gpt2"], ["roberta-base"], ["distilbert-base-uncased"], ["microsoft/DialoGPT-medium"], ["facebook/bart-base"], ["t5-small"], ["google/flan-t5-small"] ], inputs=model_input, label="šŸŽÆ Example Models" ) gr.Markdown(""" ### šŸ“ Notes: - **Weight tying detection**: Automatically handles shared parameters (e.g., input/output embeddings) - **Layer categorization**: Groups parameters by transformer layers, embeddings, etc. - **Detailed analysis**: Click "Detailed Analysis" for parameter-by-parameter breakdown - **Private models**: Use your HuggingFace token to access private or gated models - **Token security**: Token is only used for this session and not stored - **Model compatibility**: Works with most HuggingFace transformer models """) if __name__ == "__main__": demo.launch()