Spaces:
Paused
Paused
import subprocess | |
import os, requests | |
import torch, torchvision | |
import spaces | |
from huggingface_hub import login | |
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration | |
# Install required package | |
def install_flash_attn(): | |
subprocess.run( | |
"pip install flash-attn --no-build-isolation", | |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
shell=True, | |
) | |
# Authenticate with Hugging Face | |
def authenticate_hf(token): | |
login(token=token, add_to_git_credential=True) | |
# Function to get the model summary | |
model_cache = {} | |
def get_model_summary(model_name): | |
if model_name in model_cache: | |
return model_cache[model_name], "" | |
try: | |
# Fetch the config.json file | |
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json" | |
headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} | |
response = requests.get(config_url, headers=headers) | |
response.raise_for_status() | |
config = response.json() | |
architecture = config["architectures"][0] | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Check if the model is quantized | |
is_quantized = "quantized" in model_name.lower() | |
# Set up BitsAndBytesConfig if the model is quantized | |
bnb_config = BitsAndBytesConfig(load_in_4bit=True) if is_quantized else None | |
# Load the model based on its architecture and quantization status | |
if architecture == "LlavaNextForConditionalGeneration": | |
model = LlavaNextForConditionalGeneration.from_pretrained( | |
model_name, config=bnb_config, trust_remote_code=True | |
) | |
elif architecture == "LlavaForConditionalGeneration": | |
model = LlavaForConditionalGeneration.from_pretrained( | |
model_name, config=bnb_config, trust_remote_code=True | |
) | |
elif architecture == "PaliGemmaForConditionalGeneration": | |
model = PaliGemmaForConditionalGeneration.from_pretrained( | |
model_name, config=bnb_config, trust_remote_code=True | |
) | |
elif architecture == "Idefics2ForConditionalGeneration": | |
model = Idefics2ForConditionalGeneration.from_pretrained( | |
model_name, config=bnb_config, trust_remote_code=True | |
) | |
else: | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, config=bnb_config, trust_remote_code=True | |
) | |
# Move to device only if the model is not quantized | |
if not is_quantized: | |
model = model.to(device) | |
model_summary = str(model) | |
model_cache[model_name] = model_summary | |
return model_summary, "" | |
except Exception as e: | |
return "", str(e) | |