Spaces:
Sleeping
Sleeping
intuitive262
commited on
Commit
·
d529377
1
Parent(s):
1fb71cb
Updated code files
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import re
|
|
9 |
rag = RAGMultiModalModel.from_pretrained("vidore/colpali")
|
10 |
vlm = Qwen2VLForConditionalGeneration.from_pretrained(
|
11 |
"Qwen/Qwen2-VL-2B-Instruct",
|
12 |
-
torch_dtype=torch.
|
13 |
trust_remote_code=True,
|
14 |
device_map="auto",
|
15 |
)
|
@@ -32,9 +32,9 @@ def extract_text(image, query):
|
|
32 |
image_inputs, video_inputs = process_vision_info(messages)
|
33 |
inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
|
34 |
inputs = inputs.to("cpu")
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
return processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
39 |
|
40 |
def search_text(text, query):
|
|
|
9 |
rag = RAGMultiModalModel.from_pretrained("vidore/colpali")
|
10 |
vlm = Qwen2VLForConditionalGeneration.from_pretrained(
|
11 |
"Qwen/Qwen2-VL-2B-Instruct",
|
12 |
+
torch_dtype=torch.float32,
|
13 |
trust_remote_code=True,
|
14 |
device_map="auto",
|
15 |
)
|
|
|
32 |
image_inputs, video_inputs = process_vision_info(messages)
|
33 |
inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
|
34 |
inputs = inputs.to("cpu")
|
35 |
+
with torch.no_grad():
|
36 |
+
generated_ids = vlm.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9)
|
37 |
+
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
38 |
return processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
39 |
|
40 |
def search_text(text, query):
|