File size: 1,471 Bytes
839e452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Importing the requirements
import torch
from transformers import AutoModel, AutoTokenizer
import spaces


# Device for the model
device = "cuda"

# Load the model and tokenizer
model = AutoModel.from_pretrained(
    "openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True, torch_dtype=torch.float16
)
model = model.to(device=device)
tokenizer = AutoTokenizer.from_pretrained(
    "openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True
)
model.eval()


@spaces.GPU(duration=120)
def answer_question(image, question):
    """

    Generates an answer to a given question based on the provided image and question.

    Args:

        image (str): The path to the image file.

        question (str): The question text.

    Returns:

        str: The generated answer to the question.

    """

    # Message format for the model
    msgs = [{"role": "user", "content": question}]

    # Generate the answer
    res = model.chat(
        image=image,
        msgs=msgs,
        tokenizer=tokenizer,
        sampling=True,
        temperature=0.7,
        stream=True,
        system_prompt="You are an AI assistant specialized in visual content analysis. Given an image and a related question, analyze the image thoroughly and provide a precise and informative answer based on the visible content. Ensure your response is clear, accurate, and directly addresses the question.",
    )

    # Return the answer
    return "".join(res)