zamal commited on
Commit
fbbadab
1 Parent(s): f07f172

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -66
app.py CHANGED
@@ -1,72 +1,60 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoProcessor
3
  from PIL import Image
4
- import torch
5
- import os
6
- import subprocess
7
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
8
 
9
- os.system('pip install -U bitsandbytes')
10
-
11
-
12
-
13
- # Define the repository for the quantized model
14
- repo_name = "cyan2k/molmo-7B-D-bnb-4bit"
15
-
16
- # Load processor and model with GPU optimization
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
- processor = AutoProcessor.from_pretrained(repo_name, trust_remote_code=True)
19
-
20
- # Load model with 4-bit quantization
21
- model = AutoModelForCausalLM.from_pretrained(repo_name,
22
- device_map="auto",
23
- torch_dtype=torch.float16,
24
- load_in_4bit=True,
25
- trust_remote_code=True)
26
- model.to(device)
27
-
28
- def process_image_and_text(image, text):
29
- # Convert numpy image to PIL format
30
- pil_image = Image.fromarray(image)
31
-
32
- # Process image and text with processor
33
- inputs = processor(images=[pil_image], text=text, return_tensors="pt").to(device)
34
-
35
- # Generate output using the model
36
- output = model.generate(**inputs, max_new_tokens=200)
37
-
38
- # Decode the generated output
39
- generated_text = processor.decode(output[0], skip_special_tokens=True)
 
 
 
40
  return generated_text
41
 
42
- def chatbot(image, text, history):
43
- # Check if the image is uploaded
44
- if image is None:
45
- return history + [("Please upload an image first.", None)]
46
-
47
- # Get response by processing the image and text
48
- response = process_image_and_text(image, text)
49
-
50
- # Append question and response to the chat history
51
- history.append((text, response))
52
- return history
53
-
54
- # Define the Gradio interface
55
- with gr.Blocks() as demo:
56
- gr.Markdown("# Image Chatbot with Molmo-7B-4 Bit Quantized")
57
-
58
- with gr.Row():
59
- image_input = gr.Image(type="numpy")
60
- chatbot_output = gr.Chatbot()
61
-
62
- text_input = gr.Textbox(placeholder="Ask a question about the image...")
63
- submit_button = gr.Button("Submit")
64
-
65
- state = gr.State([])
66
-
67
- # Connect the submit button and textbox to the chatbot function
68
- submit_button.click(fn=chatbot, inputs=[image_input, text_input, state], outputs=chatbot_output)
69
- text_input.submit(fn=chatbot, inputs=[image_input, text_input, state], outputs=chatbot_output)
70
 
71
- # Launch the Gradio app with GPU
72
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
3
  from PIL import Image
 
 
 
 
4
 
5
+ # Load the model and processor
6
+ repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
7
+ arguments = {
8
+ "device_map": "auto",
9
+ "torch_dtype": "auto",
10
+ "trust_remote_code": True,
11
+ "load_in_8bit": True # Use 8-bit for reduced memory footprint
12
+ }
13
+
14
+ # Load the processor and model
15
+ processor = AutoProcessor.from_pretrained(repo_name, **arguments)
16
+ model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
17
+
18
+ def describe_image(image):
19
+ # Process the uploaded image
20
+ inputs = processor.process(
21
+ images=[image],
22
+ text="Describe this image in great detail."
23
+ )
24
+
25
+ # Move inputs to model device
26
+ inputs = {k: v.to(model.device) for k, v in inputs.items()} # Removed unsqueeze(0) to keep batch size
27
+
28
+ # Generate output
29
+ output = model.generate_from_batch(
30
+ inputs,
31
+ GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
32
+ tokenizer=processor.tokenizer,
33
+ )
34
+
35
+ # Decode the generated tokens
36
+ generated_tokens = output[0, inputs["input_ids"].size(1):]
37
+ generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
38
+
39
  return generated_text
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ def gradio_app():
43
+ # Define Gradio interface
44
+ image_input = gr.Image(type="pil", label="Upload Image")
45
+ output_text = gr.Textbox(label="Image Description", interactive=False)
46
+
47
+ # Create Gradio interface
48
+ interface = gr.Interface(
49
+ fn=describe_image,
50
+ inputs=image_input,
51
+ outputs=output_text,
52
+ title="Image Description App",
53
+ description="Upload an image and get a detailed description using the Molmo 7B model"
54
+ )
55
+
56
+ # Launch the interface
57
+ interface.launch()
58
+
59
+ # Launch the Gradio app
60
+ gradio_app()