File size: 1,572 Bytes
bd0f7dc
 
2b71a80
895c285
11bbd27
2b71a80
f3d47d3
7d64545
42699b5
2b71a80
61b68fd
 
fcc0cc5
895c285
2b71a80
7d64545
895c285
2b71a80
 
895c285
2b71a80
 
 
 
 
 
42699b5
2b71a80
 
 
 
895c285
2b71a80
 
 
 
 
 
 
 
 
895c285
 
2b71a80
895c285
2b71a80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import torch
from PIL import Image
from transformers import AutoModel, CLIPImageProcessor
import gradio as gr

# Load the model
model = AutoModel.from_pretrained(
    'OpenGVLab/InternVL2_5-1B',
    torch_dtype=torch.float32,  # Use float32 for CPU compatibility
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    use_flash_attn=False  # Disable Flash Attention
).eval()  # Do not move to CUDA, force CPU execution

# Load the image processor
image_processor = CLIPImageProcessor.from_pretrained('OpenGVLab/InternVL2_5-1B')

# Define the function to process the image and generate outputs
def process_image(image):
    try:
        # Convert uploaded image to RGB
        image = image.convert('RGB')
        
        # Preprocess the image
        pixel_values = image_processor(images=image, return_tensors='pt').pixel_values
        
        # Run the model on CPU
        outputs = model(pixel_values)
        
        # Assuming the model returns embeddings or features
        return f"Output Shape: {outputs.last_hidden_state.shape}"
    except Exception as e:
        return f"Error: {str(e)}"

# Create the Gradio interface
demo = gr.Interface(
    fn=process_image,  # Function to process the input
    inputs=gr.Image(type="pil"),  # Accepts images as input
    outputs=gr.Textbox(label="Model Output"),  # Displays model output
    title="InternViT Demo",
    description="Upload an image to process it using the InternViT model from OpenGVLab."
)

# Launch the demo
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)