Spaces:

zamal
/

Molmo-4bit

Running on Zero

App Files Files Community

zamal commited on 13 days ago

Commit

fbbadab

•

1 Parent(s): f07f172

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -66

app.py CHANGED Viewed

@@ -1,72 +1,60 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
-import torch
-import os
-import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-os.system('pip install -U bitsandbytes')
-# Define the repository for the quantized model
-repo_name = "cyan2k/molmo-7B-D-bnb-4bit"
-# Load processor and model with GPU optimization
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-processor = AutoProcessor.from_pretrained(repo_name, trust_remote_code=True)
-# Load model with 4-bit quantization
-model = AutoModelForCausalLM.from_pretrained(repo_name,
- device_map="auto",
- torch_dtype=torch.float16,
- load_in_4bit=True,
- trust_remote_code=True)
-model.to(device)
-def process_image_and_text(image, text):
- # Convert numpy image to PIL format
- pil_image = Image.fromarray(image)
- # Process image and text with processor
- inputs = processor(images=[pil_image], text=text, return_tensors="pt").to(device)
- # Generate output using the model
- output = model.generate(**inputs, max_new_tokens=200)
- # Decode the generated output
- generated_text = processor.decode(output[0], skip_special_tokens=True)
  return generated_text
-def chatbot(image, text, history):
- # Check if the image is uploaded
- if image is None:
- return history + [("Please upload an image first.", None)]
- # Get response by processing the image and text
- response = process_image_and_text(image, text)
- # Append question and response to the chat history
- history.append((text, response))
- return history
-# Define the Gradio interface
-with gr.Blocks() as demo:
- gr.Markdown("# Image Chatbot with Molmo-7B-4 Bit Quantized")
- with gr.Row():
- image_input = gr.Image(type="numpy")
- chatbot_output = gr.Chatbot()
- text_input = gr.Textbox(placeholder="Ask a question about the image...")
- submit_button = gr.Button("Submit")
- state = gr.State([])
- # Connect the submit button and textbox to the chatbot function
- submit_button.click(fn=chatbot, inputs=[image_input, text_input, state], outputs=chatbot_output)
- text_input.submit(fn=chatbot, inputs=[image_input, text_input, state], outputs=chatbot_output)
-# Launch the Gradio app with GPU
-demo.launch(share=True)

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
 from PIL import Image
+# Load the model and processor
+repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
+arguments = {
+ "device_map": "auto",
+ "torch_dtype": "auto",
+ "trust_remote_code": True,
+ "load_in_8bit": True # Use 8-bit for reduced memory footprint
+}
+# Load the processor and model
+processor = AutoProcessor.from_pretrained(repo_name, **arguments)
+model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
+def describe_image(image):
+ # Process the uploaded image
+ inputs = processor.process(
+ images=[image],
+ text="Describe this image in great detail."
+ )
+ # Move inputs to model device
+ inputs = {k: v.to(model.device) for k, v in inputs.items()} # Removed unsqueeze(0) to keep batch size
+ # Generate output
+ output = model.generate_from_batch(
+ inputs,
+ GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
+ tokenizer=processor.tokenizer,
+ )
+ # Decode the generated tokens
+ generated_tokens = output[0, inputs["input_ids"].size(1):]
+ generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
  return generated_text
+def gradio_app():
+ # Define Gradio interface
+ image_input = gr.Image(type="pil", label="Upload Image")
+ output_text = gr.Textbox(label="Image Description", interactive=False)
+ # Create Gradio interface
+ interface = gr.Interface(
+ fn=describe_image,
+ inputs=image_input,
+ outputs=output_text,
+ title="Image Description App",
+ description="Upload an image and get a detailed description using the Molmo 7B model"
+ )
+ # Launch the interface
+ interface.launch()
+# Launch the Gradio app
+gradio_app()