MiyamizuMitsuha
commited on
Commit
·
ce57d08
1
Parent(s):
479d45f
Update app
Browse files- app.py +87 -34
- requirements.txt +6 -1
app.py
CHANGED
@@ -99,8 +99,6 @@ def safe_cuda(self, *args, **kwargs):
|
|
99 |
torch.Tensor.cuda = safe_cuda
|
100 |
|
101 |
|
102 |
-
|
103 |
-
|
104 |
model_name = "YuukiAsuna/Vintern-1B-v2-ViTable-docvqa"
|
105 |
|
106 |
|
@@ -116,42 +114,97 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, us
|
|
116 |
|
117 |
|
118 |
|
119 |
-
|
120 |
-
def
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
print(f'User: {question}\nAssistant: {response}')
|
129 |
-
print("="*30)
|
130 |
-
|
131 |
-
|
132 |
-
# Update the chat history
|
133 |
-
chat_history.append((image, None))
|
134 |
-
chat_history.append((question, None))
|
135 |
-
chat_history.append((None, response))
|
136 |
-
|
137 |
-
return chat_history
|
138 |
-
|
139 |
-
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
)
|
|
|
152 |
|
153 |
|
154 |
-
# Launch the chatbot
|
155 |
-
interface.launch()
|
156 |
|
157 |
|
|
|
99 |
torch.Tensor.cuda = safe_cuda
|
100 |
|
101 |
|
|
|
|
|
102 |
model_name = "YuukiAsuna/Vintern-1B-v2-ViTable-docvqa"
|
103 |
|
104 |
|
|
|
114 |
|
115 |
|
116 |
|
117 |
+
@spaces.GPU
|
118 |
+
def chat(message, history):
|
119 |
+
print(history)
|
120 |
+
print(message)
|
121 |
+
if len(history) == 0 or len(message["files"]) != 0:
|
122 |
+
test_image = message["files"][0]["path"]
|
123 |
+
else:
|
124 |
+
test_image = history[0][0][0]
|
125 |
+
|
126 |
+
pixel_values = load_image(test_image, max_num=12).to(torch.bfloat16).cuda()
|
127 |
+
generation_config = dict(max_new_tokens= 1024, do_sample=True, num_beams = 3, repetition_penalty=2.5)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
if len(history) == 0:
|
132 |
+
question = '<image>\n'+message["text"]
|
133 |
+
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
|
134 |
+
else:
|
135 |
+
conv_history = []
|
136 |
+
for chat_pair in history:
|
137 |
+
if chat_pair[1] is not None:
|
138 |
+
if len(conv_history) == 0 and len(message["files"]) == 0:
|
139 |
+
chat_pair[0] = '<image>\n' + chat_pair[0]
|
140 |
+
conv_history.append(tuple(chat_pair))
|
141 |
+
print(conv_history)
|
142 |
+
if len(message["files"]) != 0:
|
143 |
+
question = '<image>\n'+message["text"]
|
144 |
+
else:
|
145 |
+
question = message["text"]
|
146 |
+
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=conv_history, return_history=True)
|
147 |
+
|
148 |
print(f'User: {question}\nAssistant: {response}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
return response
|
151 |
+
|
152 |
+
CSS ="""
|
153 |
+
# @media only screen and (max-width: 600px){
|
154 |
+
# #component-3 {
|
155 |
+
# height: 90dvh !important;
|
156 |
+
# transform-origin: top; /* Đảm bảo rằng phần tử mở rộng từ trên xuống */
|
157 |
+
# border-style: solid;
|
158 |
+
# overflow: hidden;
|
159 |
+
# flex-grow: 1;
|
160 |
+
# min-width: min(160px, 100%);
|
161 |
+
# border-width: var(--block-border-width);
|
162 |
+
# }
|
163 |
+
# }
|
164 |
+
#component-3 {
|
165 |
+
height: 50dvh !important;
|
166 |
+
transform-origin: top; /* Đảm bảo rằng phần tử mở rộng từ trên xuống */
|
167 |
+
border-style: solid;
|
168 |
+
overflow: hidden;
|
169 |
+
flex-grow: 1;
|
170 |
+
min-width: min(160px, 100%);
|
171 |
+
border-width: var(--block-border-width);
|
172 |
+
}
|
173 |
+
/* Đảm bảo ảnh bên trong nút hiển thị đúng cách cho các nút có aria-label chỉ định */
|
174 |
+
button.svelte-1lcyrx4[aria-label="user's message: a file of type image/jpeg, "] img.svelte-1pijsyv {
|
175 |
+
width: 100%;
|
176 |
+
object-fit: contain;
|
177 |
+
height: 100%;
|
178 |
+
border-radius: 13px; /* Thêm bo góc cho ảnh */
|
179 |
+
max-width: 50vw; /* Giới hạn chiều rộng ảnh */
|
180 |
+
}
|
181 |
+
/* Đặt chiều cao cho nút và cho phép chọn văn bản chỉ cho các nút có aria-label chỉ định */
|
182 |
+
button.svelte-1lcyrx4[aria-label="user's message: a file of type image/jpeg, "] {
|
183 |
+
user-select: text;
|
184 |
+
text-align: left;
|
185 |
+
height: 300px;
|
186 |
+
}
|
187 |
+
/* Thêm bo góc và giới hạn chiều rộng cho ảnh không thuộc avatar container */
|
188 |
+
.message-wrap.svelte-1lcyrx4 > div.svelte-1lcyrx4 .svelte-1lcyrx4:not(.avatar-container) img {
|
189 |
+
border-radius: 13px;
|
190 |
+
max-width: 50vw;
|
191 |
+
}
|
192 |
+
.message-wrap.svelte-1lcyrx4 .message.svelte-1lcyrx4 img {
|
193 |
+
margin: var(--size-2);
|
194 |
+
max-height: 500px;
|
195 |
+
}
|
196 |
+
"""
|
197 |
+
|
198 |
+
|
199 |
+
demo = gr.ChatInterface(
|
200 |
+
fn=chat,
|
201 |
+
description="""Try [Vintern-1B-v2-ViTable-docvqa](https://huggingface.co/YuukiAsuna/Vintern-1B-v2-ViTable-docvqa) in this demo. Vintern-1B-v2-ViTable-docvqa is a finetuned version of [Vintern-1B-v2](https://huggingface.co/5CD-AI/Vintern-1B-v2)""",
|
202 |
+
title="Vintern-1B-v2-ViTable-docvqa",
|
203 |
+
multimodal=True,
|
204 |
+
css=CSS
|
205 |
)
|
206 |
+
demo.queue().launch()
|
207 |
|
208 |
|
|
|
|
|
209 |
|
210 |
|
requirements.txt
CHANGED
@@ -12,4 +12,9 @@ accelerate
|
|
12 |
bitsandbytes
|
13 |
peft
|
14 |
tensorboardX
|
15 |
-
flash_attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
|
|
|
|
|
|
|
|
|
|
|
12 |
bitsandbytes
|
13 |
peft
|
14 |
tensorboardX
|
15 |
+
flash_attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
16 |
+
spaces
|
17 |
+
pypandoc
|
18 |
+
fastapi
|
19 |
+
wheel
|
20 |
+
imageio
|