Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,15 @@
|
|
1 |
import os
|
2 |
import torch
|
3 |
import spaces
|
4 |
-
import subprocess
|
5 |
import gradio as gr
|
6 |
from huggingface_hub import login
|
7 |
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
|
8 |
|
9 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
login(os.environ.get("HF_TOKEN"))
|
11 |
|
12 |
model_id = "google/paligemma-3b-mix-448"
|
13 |
model = PaliGemmaForConditionalGeneration.from_pretrained(
|
14 |
model_id, device_map={"": 0},
|
15 |
-
attn_implementation="flash_attention_2",
|
16 |
torch_dtype=torch.bfloat16,
|
17 |
)
|
18 |
processor = AutoProcessor.from_pretrained(model_id)
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
import spaces
|
|
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import login
|
6 |
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
|
7 |
|
|
|
8 |
login(os.environ.get("HF_TOKEN"))
|
9 |
|
10 |
model_id = "google/paligemma-3b-mix-448"
|
11 |
model = PaliGemmaForConditionalGeneration.from_pretrained(
|
12 |
model_id, device_map={"": 0},
|
|
|
13 |
torch_dtype=torch.bfloat16,
|
14 |
)
|
15 |
processor = AutoProcessor.from_pretrained(model_id)
|