Spaces:

gauri-sharan
/

test-two

Sleeping

gauri-sharan commited on Sep 29, 2024

Commit

3ef82d2

verified ·

1 Parent(s): e016842

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,16 +6,20 @@ import torch
 from PIL import Image
 import os
 import traceback
-import spaces  # Ensure import for GPU management
-# Load the Byaldi and Qwen2-VL models without using .cuda()
-rag_model = RAGMultiModalModel.from_pretrained("vidore/colpali")
 qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
-    "Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype=torch.bfloat16
-)
 # Processor for Qwen2-VL
-processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
 @spaces.GPU  # Decorate the function for GPU management
 def ocr_and_extract(image, text_query):
@@ -52,16 +56,13 @@ def ocr_and_extract(image, text_query):
         text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         image_inputs, _ = process_vision_info(messages)
         inputs = processor(
             text=[text_input],
             images=image_inputs,
             padding=True,
             return_tensors="pt",
-        )
-        # Move the Qwen2-VL model and inputs to GPU
-        qwen_model.to("cuda")
-        inputs = {k: v.to("cuda") for k, v in inputs.items()}
         # Generate the output with Qwen2-VL
         generated_ids = qwen_model.generate(**inputs, max_new_tokens=50)
@@ -92,4 +93,4 @@ iface = gr.Interface(
 )
 # Launch the Gradio app
-iface.launch()

 from PIL import Image
 import os
 import traceback
+import spaces
+# Check if CUDA is available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load the Byaldi and Qwen2-VL models
+rag_model = RAGMultiModalModel.from_pretrained("vidore/colpali").to(device)  # Move Byaldi to GPU
 qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True, torch_dtype=torch.bfloat16
+).to(device)  # Move Qwen2-VL to GPU
 # Processor for Qwen2-VL
+processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
 @spaces.GPU  # Decorate the function for GPU management
 def ocr_and_extract(image, text_query):
         text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         image_inputs, _ = process_vision_info(messages)
+        # Move the image inputs and processor outputs to CUDA
         inputs = processor(
             text=[text_input],
             images=image_inputs,
             padding=True,
             return_tensors="pt",
+        ).to(device)
         # Generate the output with Qwen2-VL
         generated_ids = qwen_model.generate(**inputs, max_new_tokens=50)
 )
 # Launch the Gradio app
+iface.launch()