Ravinandan commited on
Commit
b918dff
1 Parent(s): dcfbbe4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from PIL import Image
4
  import gradio as gr
5
 
6
  # Load the model and tokenizer
@@ -15,7 +14,7 @@ tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", u
15
  # Define the function to process the image and instruction
16
  def describe_image(image, instruction):
17
  description = model.answer_question(
18
- image, instruction, tokenizer, max_new_tokens=128, do_sample=True, temperature=0.3
19
  )
20
  return description
21
 
@@ -24,12 +23,21 @@ interface = gr.Interface(
24
  fn=describe_image,
25
  inputs=[
26
  gr.Image(type="pil"), # Input for the image
27
- gr.Textbox(placeholder="Enter your instruction here...", label="Instruction") # Input for the instruction
 
 
 
 
 
28
  ],
29
- outputs="text", # Output is text (the description)
30
- title="LLaMA 3.1 with vision",
 
 
 
 
31
  description="Upload an image and enter an instruction to generate a description based on the provided instruction."
32
  )
33
 
34
  # Launch the Gradio app
35
- interface.launch()
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
  import gradio as gr
4
 
5
  # Load the model and tokenizer
 
14
  # Define the function to process the image and instruction
15
  def describe_image(image, instruction):
16
  description = model.answer_question(
17
+ image, instruction, tokenizer, max_new_tokens=1000, do_sample=True, temperature=0.3
18
  )
19
  return description
20
 
 
23
  fn=describe_image,
24
  inputs=[
25
  gr.Image(type="pil"), # Input for the image
26
+ gr.Textbox(
27
+ placeholder="Enter your instruction here...",
28
+ label="Instruction",
29
+ lines=10, # Increase number of lines for instruction input
30
+ max_lines=20 # Maximum number of lines for scrolling
31
+ )
32
  ],
33
+ outputs=gr.Textbox(
34
+ label="Description",
35
+ lines=10, # Increase number of lines for output
36
+ max_lines=30 # Maximum number of lines for scrolling
37
+ ),
38
+ title="LLaMA 3.1 with Vision",
39
  description="Upload an image and enter an instruction to generate a description based on the provided instruction."
40
  )
41
 
42
  # Launch the Gradio app
43
+ interface.launch()