guneetsk99 commited on
Commit
3d6b1e3
·
verified ·
1 Parent(s): 7a6263a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -1,33 +1,38 @@
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
3
  from PIL import Image
4
 
5
- # Load model and processor
6
  processor = AutoProcessor.from_pretrained("guneetsk99/finance_qwen_VL_7B")
7
  model = AutoModelForImageTextToText.from_pretrained("guneetsk99/finance_qwen_VL_7B")
8
 
9
- def predict(input_img):
10
- # Preprocess the image
11
- inputs = processor(images=input_img, return_tensors="pt")
12
 
13
  # Generate predictions using the model
14
- outputs = model.generate(**inputs)
 
15
 
16
  # Decode the generated text
17
  generated_text = processor.decode(outputs[0], skip_special_tokens=True)
18
 
19
- # Return the input image and the generated text
20
- return input_img, {"Prediction": generated_text}
21
 
22
  # Create the Gradio interface
23
  gradio_app = gr.Interface(
24
- predict,
25
- inputs=gr.Image(label="Upload Image", source="upload", type="pil"),
 
 
 
26
  outputs=[
27
  gr.Image(label="Uploaded Image"),
28
- gr.Label(label="Generated Text"),
29
  ],
30
- title="Image to Text Model",
 
31
  )
32
 
33
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForImageTextToText
3
+ import torch
4
  from PIL import Image
5
 
6
+ # Load the processor and model
7
  processor = AutoProcessor.from_pretrained("guneetsk99/finance_qwen_VL_7B")
8
  model = AutoModelForImageTextToText.from_pretrained("guneetsk99/finance_qwen_VL_7B")
9
 
10
+ def predict(input_img, text_prompt):
11
+ # Preprocess the image and text prompt
12
+ inputs = processor(images=input_img, text=text_prompt, return_tensors="pt").to(model.device)
13
 
14
  # Generate predictions using the model
15
+ with torch.no_grad():
16
+ outputs = model.generate(**inputs, max_new_tokens=50)
17
 
18
  # Decode the generated text
19
  generated_text = processor.decode(outputs[0], skip_special_tokens=True)
20
 
21
+ return input_img, generated_text
 
22
 
23
  # Create the Gradio interface
24
  gradio_app = gr.Interface(
25
+ fn=predict,
26
+ inputs=[
27
+ gr.Image(label="Upload Image", source="upload", type="pil"),
28
+ gr.Textbox(label="Text Prompt", placeholder="Enter a text prompt, e.g., 'Describe this image.'"),
29
+ ],
30
  outputs=[
31
  gr.Image(label="Uploaded Image"),
32
+ gr.Textbox(label="Generated Response"),
33
  ],
34
+ title="Finance Image-to-Text Model",
35
+ description="Upload a financial document image and provide a text prompt for the model to process the image and generate a text response.",
36
  )
37
 
38
  if __name__ == "__main__":