Spaces:

Ravinandan
/

llama_3.1_with_vision

Runtime error

App Files Files Community

Ravinandan commited on Sep 4

Commit

dcfbbe4

•

1 Parent(s): 0823dd5

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -21

app.py CHANGED Viewed

@@ -1,11 +1,8 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
-# Set environment variable for PyTorch CUDA memory management
-import os
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 # Load the model and tokenizer
 model = AutoModelForCausalLM.from_pretrained(
  "qresearch/llama-3.1-8B-vision-378",
@@ -17,21 +14,9 @@ tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", u
 # Define the function to process the image and instruction
 def describe_image(image, instruction):
- # Convert the image to text representation (assuming image processing is handled)
- inputs = tokenizer(instruction, return_tensors="pt").to("cuda")
- # Generate a description using the model
- with torch.no_grad(): # Avoid storing gradients to save memory
- outputs = model.generate(
- **inputs,
- max_new_tokens=128,
- do_sample=True,
- temperature=0.3
- )
- # Decode the generated tokens to a string
- description = tokenizer.decode(outputs[0], skip_special_tokens=True)
  return description
 # Create the Gradio interface
@@ -42,9 +27,9 @@ interface = gr.Interface(
  gr.Textbox(placeholder="Enter your instruction here...", label="Instruction") # Input for the instruction
  ],
  outputs="text", # Output is text (the description)
- title="LLaMA 3.1 with Vision",
  description="Upload an image and enter an instruction to generate a description based on the provided instruction."
 )
 # Launch the Gradio app
-interface.launch()

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from PIL import Image
 import gradio as gr
 # Load the model and tokenizer
 model = AutoModelForCausalLM.from_pretrained(
  "qresearch/llama-3.1-8B-vision-378",
 # Define the function to process the image and instruction
 def describe_image(image, instruction):
+ description = model.answer_question(
+ image, instruction, tokenizer, max_new_tokens=128, do_sample=True, temperature=0.3
+ )
  return description
 # Create the Gradio interface
  gr.Textbox(placeholder="Enter your instruction here...", label="Instruction") # Input for the instruction
  ],
  outputs="text", # Output is text (the description)
+ title="LLaMA 3.1 with vision",
  description="Upload an image and enter an instruction to generate a description based on the provided instruction."
 )
 # Launch the Gradio app
+interface.launch()