MiyamizuMitsuha commited on
Commit
ce57d08
·
1 Parent(s): 479d45f

Update app

Browse files
Files changed (2) hide show
  1. app.py +87 -34
  2. requirements.txt +6 -1
app.py CHANGED
@@ -99,8 +99,6 @@ def safe_cuda(self, *args, **kwargs):
99
  torch.Tensor.cuda = safe_cuda
100
 
101
 
102
-
103
-
104
  model_name = "YuukiAsuna/Vintern-1B-v2-ViTable-docvqa"
105
 
106
 
@@ -116,42 +114,97 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, us
116
 
117
 
118
 
119
-
120
- def Vintern_1B_v2_ViTable_docvqa(image, question, chat_history=[]):
121
- pixel_values = load_image(image, max_num=12).to(torch.bfloat16).cuda()
122
-
123
- generation_config = dict(max_new_tokens= 1024, do_sample=False, num_beams = 3, repetition_penalty=2.0)
124
-
125
- # question = input("Question: ")
126
- question = '<image>\n' + question
127
- response = model.chat(tokenizer, pixel_values, question, generation_config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  print(f'User: {question}\nAssistant: {response}')
129
- print("="*30)
130
-
131
-
132
- # Update the chat history
133
- chat_history.append((image, None))
134
- chat_history.append((question, None))
135
- chat_history.append((None, response))
136
-
137
- return chat_history
138
-
139
-
140
 
141
- interface = gr.Interface(
142
- fn=Vintern_1B_v2_ViTable_docvqa,
143
- inputs=[
144
- gr.Image(label="Upload Image", type="filepath"), # Image input
145
- gr.Textbox(label="Enter your question"), # Text input
146
- ],
147
- outputs=gr.Chatbot(label="Chat History"), # Chatbot-style output
148
- title="Vintern-1B-v2-ViTable-docvqa,",
149
- # description="A chatbot that accepts both images and text, displays images, and provides conversational responses.",
150
- allow_flagging="never",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  )
 
152
 
153
 
154
- # Launch the chatbot
155
- interface.launch()
156
 
157
 
 
99
  torch.Tensor.cuda = safe_cuda
100
 
101
 
 
 
102
  model_name = "YuukiAsuna/Vintern-1B-v2-ViTable-docvqa"
103
 
104
 
 
114
 
115
 
116
 
117
+ @spaces.GPU
118
+ def chat(message, history):
119
+ print(history)
120
+ print(message)
121
+ if len(history) == 0 or len(message["files"]) != 0:
122
+ test_image = message["files"][0]["path"]
123
+ else:
124
+ test_image = history[0][0][0]
125
+
126
+ pixel_values = load_image(test_image, max_num=12).to(torch.bfloat16).cuda()
127
+ generation_config = dict(max_new_tokens= 1024, do_sample=True, num_beams = 3, repetition_penalty=2.5)
128
+
129
+
130
+
131
+ if len(history) == 0:
132
+ question = '<image>\n'+message["text"]
133
+ response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
134
+ else:
135
+ conv_history = []
136
+ for chat_pair in history:
137
+ if chat_pair[1] is not None:
138
+ if len(conv_history) == 0 and len(message["files"]) == 0:
139
+ chat_pair[0] = '<image>\n' + chat_pair[0]
140
+ conv_history.append(tuple(chat_pair))
141
+ print(conv_history)
142
+ if len(message["files"]) != 0:
143
+ question = '<image>\n'+message["text"]
144
+ else:
145
+ question = message["text"]
146
+ response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=conv_history, return_history=True)
147
+
148
  print(f'User: {question}\nAssistant: {response}')
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ return response
151
+
152
+ CSS ="""
153
+ # @media only screen and (max-width: 600px){
154
+ # #component-3 {
155
+ # height: 90dvh !important;
156
+ # transform-origin: top; /* Đảm bảo rằng phần tử mở rộng từ trên xuống */
157
+ # border-style: solid;
158
+ # overflow: hidden;
159
+ # flex-grow: 1;
160
+ # min-width: min(160px, 100%);
161
+ # border-width: var(--block-border-width);
162
+ # }
163
+ # }
164
+ #component-3 {
165
+ height: 50dvh !important;
166
+ transform-origin: top; /* Đảm bảo rằng phần tử mở rộng từ trên xuống */
167
+ border-style: solid;
168
+ overflow: hidden;
169
+ flex-grow: 1;
170
+ min-width: min(160px, 100%);
171
+ border-width: var(--block-border-width);
172
+ }
173
+ /* Đảm bảo ảnh bên trong nút hiển thị đúng cách cho các nút có aria-label chỉ định */
174
+ button.svelte-1lcyrx4[aria-label="user's message: a file of type image/jpeg, "] img.svelte-1pijsyv {
175
+ width: 100%;
176
+ object-fit: contain;
177
+ height: 100%;
178
+ border-radius: 13px; /* Thêm bo góc cho ảnh */
179
+ max-width: 50vw; /* Giới hạn chiều rộng ảnh */
180
+ }
181
+ /* Đặt chiều cao cho nút và cho phép chọn văn bản chỉ cho các nút có aria-label chỉ định */
182
+ button.svelte-1lcyrx4[aria-label="user's message: a file of type image/jpeg, "] {
183
+ user-select: text;
184
+ text-align: left;
185
+ height: 300px;
186
+ }
187
+ /* Thêm bo góc và giới hạn chiều rộng cho ảnh không thuộc avatar container */
188
+ .message-wrap.svelte-1lcyrx4 > div.svelte-1lcyrx4 .svelte-1lcyrx4:not(.avatar-container) img {
189
+ border-radius: 13px;
190
+ max-width: 50vw;
191
+ }
192
+ .message-wrap.svelte-1lcyrx4 .message.svelte-1lcyrx4 img {
193
+ margin: var(--size-2);
194
+ max-height: 500px;
195
+ }
196
+ """
197
+
198
+
199
+ demo = gr.ChatInterface(
200
+ fn=chat,
201
+ description="""Try [Vintern-1B-v2-ViTable-docvqa](https://huggingface.co/YuukiAsuna/Vintern-1B-v2-ViTable-docvqa) in this demo. Vintern-1B-v2-ViTable-docvqa is a finetuned version of [Vintern-1B-v2](https://huggingface.co/5CD-AI/Vintern-1B-v2)""",
202
+ title="Vintern-1B-v2-ViTable-docvqa",
203
+ multimodal=True,
204
+ css=CSS
205
  )
206
+ demo.queue().launch()
207
 
208
 
 
 
209
 
210
 
requirements.txt CHANGED
@@ -12,4 +12,9 @@ accelerate
12
  bitsandbytes
13
  peft
14
  tensorboardX
15
- flash_attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
 
 
 
 
 
12
  bitsandbytes
13
  peft
14
  tensorboardX
15
+ flash_attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
16
+ spaces
17
+ pypandoc
18
+ fastapi
19
+ wheel
20
+ imageio