Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,6 @@ from pdf2image import convert_from_path
|
|
11 |
import torch
|
12 |
from torch.utils.data import DataLoader
|
13 |
from transformers.utils.import_utils import is_flash_attn_2_available
|
14 |
-
# from colpali_engine.models import ColQwen2, ColQwen2Processor
|
15 |
from colpali_engine.models import ColQwen2_5, ColQwen2_5_Processor
|
16 |
from openai import OpenAI
|
17 |
import spaces
|
@@ -21,12 +20,11 @@ import gradio as gr
|
|
21 |
# Enable flash attention
|
22 |
# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
23 |
|
24 |
-
# Load the visual document retrieval model
|
25 |
model = ColQwen2_5.from_pretrained(
|
26 |
"vidore/colqwen2.5-v0.2",
|
27 |
torch_dtype=torch.bfloat16,
|
28 |
device_map="cuda:0",
|
29 |
-
# attn_implementation="flash_attention_2",
|
30 |
attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
|
31 |
).eval()
|
32 |
processor = ColQwen2_5_Processor.from_pretrained("vidore/colqwen2.5-v0.2")
|
@@ -59,7 +57,7 @@ def convert_files(files):
|
|
59 |
################################################
|
60 |
@spaces.GPU
|
61 |
def index_gpu(images, ds):
|
62 |
-
"""Runs inference on the GPU for the given images with
|
63 |
# Specify the device
|
64 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
65 |
if device != model.device:
|
|
|
11 |
import torch
|
12 |
from torch.utils.data import DataLoader
|
13 |
from transformers.utils.import_utils import is_flash_attn_2_available
|
|
|
14 |
from colpali_engine.models import ColQwen2_5, ColQwen2_5_Processor
|
15 |
from openai import OpenAI
|
16 |
import spaces
|
|
|
20 |
# Enable flash attention
|
21 |
# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
22 |
|
23 |
+
# Load the visual document retrieval model
|
24 |
model = ColQwen2_5.from_pretrained(
|
25 |
"vidore/colqwen2.5-v0.2",
|
26 |
torch_dtype=torch.bfloat16,
|
27 |
device_map="cuda:0",
|
|
|
28 |
attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
|
29 |
).eval()
|
30 |
processor = ColQwen2_5_Processor.from_pretrained("vidore/colqwen2.5-v0.2")
|
|
|
57 |
################################################
|
58 |
@spaces.GPU
|
59 |
def index_gpu(images, ds):
|
60 |
+
"""Runs inference on the GPU for the given images with the visual document retrieval model."""
|
61 |
# Specify the device
|
62 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
63 |
if device != model.device:
|