aekpic877 commited on
Commit
8dd40fa
1 Parent(s): 934f8a8
Files changed (1) hide show
  1. app.py +42 -30
app.py CHANGED
@@ -1,40 +1,52 @@
1
- # test.py
2
  import torch
3
  from PIL import Image
4
  from transformers import AutoModel, AutoTokenizer
 
5
 
6
- model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True, torch_dtype=torch.float16)
7
- model = model.to(device='cuda')
8
 
9
- tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True)
10
- model.eval()
 
 
 
 
 
 
11
 
12
- image = Image.open('xx.jpg').convert('RGB')
13
- question = 'What is in the image?'
14
- msgs = [{'role': 'user', 'content': question}]
15
 
16
- res = model.chat(
17
- image=image,
18
- msgs=msgs,
19
- tokenizer=tokenizer,
20
- sampling=True, # if sampling=False, beam_search will be used by default
21
- temperature=0.7,
22
- # system_prompt='' # pass system_prompt if needed
23
- )
24
- print(res)
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- ## if you want to use streaming, please make sure sampling=True and stream=True
27
- ## the model.chat will return a generator
28
- res = model.chat(
29
- image=image,
30
- msgs=msgs,
31
- tokenizer=tokenizer,
32
- sampling=True,
33
- temperature=0.7,
34
- stream=True
35
  )
36
 
37
- generated_text = ""
38
- for new_text in res:
39
- generated_text += new_text
40
- print(new_text, flush=True, end='')
 
 
1
  import torch
2
  from PIL import Image
3
  from transformers import AutoModel, AutoTokenizer
4
+ import gradio as gr
5
 
6
+ # Load a smaller model and tokenizer
7
+ model_name = 'google/vit-base-patch16-224' # Example of a smaller model, adjust as needed
8
 
9
+ try:
10
+ model = AutoModel.from_pretrained(model_name, torch_dtype=torch.float16)
11
+ model = model.to(device='cuda' if torch.cuda.is_available() else 'cpu')
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model.eval()
14
+ except Exception as e:
15
+ print(f"Error loading model or tokenizer: {e}")
16
+ exit()
17
 
18
+ def process_image(image, question):
19
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
20
 
21
+ # Convert Gradio image to PIL Image
22
+ image = Image.fromarray(image).convert('RGB')
23
+
24
+ # Create message list
25
+ msgs = [{'role': 'user', 'content': question}]
26
+
27
+ # Perform inference
28
+ try:
29
+ with torch.no_grad():
30
+ res = model.chat(
31
+ image=image,
32
+ msgs=msgs,
33
+ tokenizer=tokenizer,
34
+ sampling=True, # if sampling=False, beam_search will be used by default
35
+ temperature=0.7,
36
+ stream=False # Set to False for non-streaming output
37
+ )
38
+ return res
39
+ except Exception as e:
40
+ return f"Error during model inference: {e}"
41
 
42
+ # Define the Gradio interface
43
+ interface = gr.Interface(
44
+ fn=process_image,
45
+ inputs=[gr.inputs.Image(type='numpy'), gr.inputs.Textbox(label="Question")],
46
+ outputs="text",
47
+ title="Image Question Answering",
48
+ description="Upload an image and ask a question about it. The model will provide an answer."
 
 
49
  )
50
 
51
+ # Launch the Gradio app
52
+ interface.launch()