Spaces:
Running
on
Zero
Running
on
Zero
Update chatbot.py
Browse files- chatbot.py +17 -32
chatbot.py
CHANGED
@@ -198,6 +198,8 @@ client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
|
|
198 |
client_mistral = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
199 |
generate_kwargs = dict( max_new_tokens=4000, do_sample=True, stream=True, details=True, return_full_text=False )
|
200 |
|
|
|
|
|
201 |
@spaces.GPU(duration=60, queue=False)
|
202 |
def model_inference( user_prompt, chat_history, web_search):
|
203 |
if not user_prompt["files"]:
|
@@ -242,45 +244,28 @@ def model_inference( user_prompt, chat_history, web_search):
|
|
242 |
output += response.token.text
|
243 |
yield output
|
244 |
else:
|
245 |
-
|
246 |
-
if len(message["files"]) == 1:
|
247 |
-
image = [message["files"][0].path]
|
248 |
-
elif len(message["files"]) > 1:
|
249 |
-
image = [msg.path for msg in message["files"]]
|
250 |
|
251 |
-
txt =
|
|
|
|
|
252 |
|
253 |
video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
|
254 |
image_extensions = Image.registered_extensions()
|
255 |
image_extensions = tuple([ex for ex, f in image_extensions.items()])
|
256 |
-
|
257 |
-
if len(image) == 1:
|
258 |
-
if image.endswith(video_extensions):
|
259 |
-
image = sample_frames(image)
|
260 |
-
print(len(image))
|
261 |
-
image_tokens = "<image>" * int(len(image))
|
262 |
-
prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
|
263 |
-
elif image.endswith(image_extensions):
|
264 |
-
image = Image.open(image).convert("RGB")
|
265 |
-
prompt = f"<|im_start|>user <image>\n{user_prompt}<|im_end|><|im_start|>assistant"
|
266 |
-
|
267 |
-
elif len(image) > 1:
|
268 |
-
image_list = []
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
-
elif img.endswith(video_extensions):
|
276 |
-
frames = sample_frames(img)
|
277 |
-
for frame in frames:
|
278 |
-
image_list.append(frame)
|
279 |
-
|
280 |
-
toks = "<image>" * len(image_list)
|
281 |
-
prompt = f"<|im_start|>user {toks}\n{user_prompt}<|im_end|><|im_start|>assistant"
|
282 |
-
image = image_list
|
283 |
-
|
284 |
inputs = processor(prompt, image, return_tensors="pt").to("cuda", torch.float16)
|
285 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
|
286 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|
|
|
198 |
client_mistral = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
199 |
generate_kwargs = dict( max_new_tokens=4000, do_sample=True, stream=True, details=True, return_full_text=False )
|
200 |
|
201 |
+
system_llava = "<|im_start|>system\nYou are OpenGPT 4o, an exceptionally capable and versatile AI assistant made by KingNish. Your task is to fulfill users query in best possible way. You are provided with image, videos and 3d structures as input with question your task is to give best possible detailed results to user according to their query. Reply the question asked by user properly and best possible way.<|im_end|>"
|
202 |
+
|
203 |
@spaces.GPU(duration=60, queue=False)
|
204 |
def model_inference( user_prompt, chat_history, web_search):
|
205 |
if not user_prompt["files"]:
|
|
|
244 |
output += response.token.text
|
245 |
yield output
|
246 |
else:
|
247 |
+
image = user_prompt["files"][-1]
|
|
|
|
|
|
|
|
|
248 |
|
249 |
+
txt = user_prompt["text"]
|
250 |
+
img = user_prompt["files"]
|
251 |
+
ext_buffer =f"'user\ntext': '{txt}', 'files': '{img}' assistant"
|
252 |
|
253 |
video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
|
254 |
image_extensions = Image.registered_extensions()
|
255 |
image_extensions = tuple([ex for ex, f in image_extensions.items()])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
+
if image.endswith(video_extensions):
|
258 |
+
image = sample_frames(image)
|
259 |
+
print(len(image))
|
260 |
+
image_tokens = "<image>" * int(len(image))
|
261 |
+
prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
|
262 |
+
|
263 |
+
elif image.endswith(image_extensions):
|
264 |
+
image = Image.open(image).convert("RGB")
|
265 |
+
prompt = f"<|im_start|>user <image>\n{user_prompt}<|im_end|><|im_start|>assistant"
|
266 |
+
|
267 |
+
final_prompt = f"{system_llava}\n{prompt}"
|
268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
inputs = processor(prompt, image, return_tensors="pt").to("cuda", torch.float16)
|
270 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
|
271 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|