Dongxu Li commited on
Commit
f7f5be8
1 Parent(s): 8f68280

fix missing rep_penalty.

Browse files
Files changed (1) hide show
  1. app.py +31 -36
app.py CHANGED
@@ -3,7 +3,6 @@ from io import BytesIO
3
  import string
4
  import gradio as gr
5
  import requests
6
- from PIL import Image
7
  from utils import Endpoint
8
 
9
 
@@ -15,7 +14,10 @@ def encode_image(image):
15
  return buffered
16
 
17
 
18
- def query_api(image, prompt, decoding_method, temperature, len_penalty, repetition_penalty):
 
 
 
19
  url = endpoint.url
20
 
21
  headers = {"User-Agent": "BLIP-2 HuggingFace Space"}
@@ -60,8 +62,11 @@ def inference(
60
  history.append(text_input)
61
 
62
  prompt = " ".join(history)
 
63
 
64
- output = query_api(image, prompt, decoding_method, temperature, length_penalty, repetition_penalty)
 
 
65
  output = postprocess_output(output)
66
  history += output
67
 
@@ -69,37 +74,23 @@ def inference(
69
  (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)
70
  ] # convert to tuples of list
71
 
72
- return chat, history
73
-
74
-
75
- # image source: https://m.facebook.com/112483753737319/photos/112489593736735/
76
- endpoint = Endpoint()
77
-
78
- examples = [
79
- ["house.png", "How could someone get out of the house?"],
80
- [
81
- "sunset.png",
82
- "Write a romantic message that goes along this photo.",
83
- ],
84
- ]
85
 
86
- # outputs = ["chatbot", "state"]
87
 
88
  title = """<h1 align="center">BLIP-2</h1>"""
89
  description = """Gradio demo for BLIP-2, a multimodal chatbot from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
90
  <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
91
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
92
 
93
- # iface = gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples)
94
-
95
-
96
- def reset_all(text_input, image_input, chatbot, history):
97
- return "", None, None, []
98
-
99
-
100
- def reset_chatbot(chatbot, history):
101
- return None, []
102
 
 
 
 
 
 
 
 
103
 
104
  with gr.Blocks() as iface:
105
  state = gr.State([])
@@ -139,25 +130,30 @@ with gr.Blocks() as iface:
139
 
140
  rep_penalty = gr.Slider(
141
  minimum=1.0,
142
- maximum=10.0,
143
- value=1.0,
144
  step=0.5,
145
  interactive=True,
146
  label="Repetition Penalty",
147
  )
148
 
149
  with gr.Column():
150
- chatbot = gr.Chatbot()
 
 
151
 
152
  with gr.Row():
 
153
  clear_button = gr.Button(value="Clear", interactive=True)
154
  clear_button.click(
155
- reset_all,
156
- [text_input, image_input, chatbot, state],
157
  [text_input, image_input, chatbot, state],
158
  )
159
 
160
- submit_button = gr.Button(value="Submit", interactive=True, variant="primary")
 
 
161
  submit_button.click(
162
  inference,
163
  [
@@ -166,17 +162,16 @@ with gr.Blocks() as iface:
166
  sampling,
167
  temperature,
168
  len_penalty,
 
169
  state,
170
  ],
171
  [chatbot, state],
172
  )
173
 
174
- image_input.change(reset_chatbot, [chatbot, state], [chatbot, state])
175
-
176
  examples = gr.Examples(
177
  examples=examples,
178
  inputs=[image_input, text_input],
179
  )
180
 
181
- iface.queue(concurrency_count=1)
182
- iface.launch(enable_queue=True, debug=True)
 
3
  import string
4
  import gradio as gr
5
  import requests
 
6
  from utils import Endpoint
7
 
8
 
 
14
  return buffered
15
 
16
 
17
+ def query_api(
18
+ image, prompt, decoding_method, temperature, len_penalty, repetition_penalty
19
+ ):
20
+
21
  url = endpoint.url
22
 
23
  headers = {"User-Agent": "BLIP-2 HuggingFace Space"}
 
62
  history.append(text_input)
63
 
64
  prompt = " ".join(history)
65
+ print(prompt)
66
 
67
+ output = query_api(
68
+ image, prompt, decoding_method, temperature, length_penalty, repetition_penalty
69
+ )
70
  output = postprocess_output(output)
71
  history += output
72
 
 
74
  (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)
75
  ] # convert to tuples of list
76
 
77
+ return {chatbot: chat, state: history}
 
 
 
 
 
 
 
 
 
 
 
 
78
 
 
79
 
80
  title = """<h1 align="center">BLIP-2</h1>"""
81
  description = """Gradio demo for BLIP-2, a multimodal chatbot from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
82
  <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
83
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
84
 
85
+ endpoint = Endpoint()
 
 
 
 
 
 
 
 
86
 
87
+ examples = [
88
+ ["house.png", "How could someone get out of the house?"],
89
+ # [
90
+ # "sunset.png",
91
+ # "Write a romantic message that goes along this photo.",
92
+ # ],
93
+ ]
94
 
95
  with gr.Blocks() as iface:
96
  state = gr.State([])
 
130
 
131
  rep_penalty = gr.Slider(
132
  minimum=1.0,
133
+ maximum=20.0,
134
+ value=10.0,
135
  step=0.5,
136
  interactive=True,
137
  label="Repetition Penalty",
138
  )
139
 
140
  with gr.Column():
141
+ with gr.Row():
142
+ chatbot = gr.Chatbot()
143
+ image_input.change(lambda: (None, []), [], [chatbot, state])
144
 
145
  with gr.Row():
146
+
147
  clear_button = gr.Button(value="Clear", interactive=True)
148
  clear_button.click(
149
+ lambda: ("", None, [], []),
150
+ [],
151
  [text_input, image_input, chatbot, state],
152
  )
153
 
154
+ submit_button = gr.Button(
155
+ value="Submit", interactive=True, variant="primary"
156
+ )
157
  submit_button.click(
158
  inference,
159
  [
 
162
  sampling,
163
  temperature,
164
  len_penalty,
165
+ rep_penalty,
166
  state,
167
  ],
168
  [chatbot, state],
169
  )
170
 
 
 
171
  examples = gr.Examples(
172
  examples=examples,
173
  inputs=[image_input, text_input],
174
  )
175
 
176
+ iface.queue(concurrency_count=1, api_open=False, max_size=20)
177
+ iface.launch(enable_queue=True)