Spaces:

lixin4ever
/

VideoLLaMA2

Running on Zero

App Files Files Community

ClownRat commited on Jun 14, 2024

Commit

ee3d0a5

1 Parent(s): fe61520

Update demo.

Browse files

Files changed (1) hide show

app.py +26 -24

app.py CHANGED Viewed

@@ -102,6 +102,7 @@ class Chat:
         # 2. text preprocess (tag process & generate prompt).
         state = self.get_prompt(prompt, state)
         prompt = state.get_prompt()
         input_ids = tokenizer_MMODAL_token(prompt, tokenizer, MMODAL_TOKEN_INDEX[modals[0]], return_tensors='pt')
         input_ids = input_ids.unsqueeze(0).to(self.model.device)
@@ -130,15 +131,13 @@ class Chat:
 @spaces.GPU(duration=120)
-def generate(image, video, first_run, state, state_, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
-    flag = 1
     if not textbox_in:
         if len(state_.messages) > 0:
             textbox_in = state_.messages[-1][1]
             state_.messages.pop(-1)
-            flag = 0
         else:
-            return "Please enter instruction"
     image = image if image else "none"
     video = video if video else "none"
@@ -187,30 +186,34 @@ def generate(image, video, first_run, state, state_, textbox_in, temperature, to
     if os.path.exists(video):
         show_images += f'<video controls playsinline width="500" style="display: inline-block;"  src="./file={video}"></video>'
-    if flag:
-        state.append_message(state.roles[0], textbox_in + "\n" + show_images)
     state.append_message(state.roles[1], textbox_out)
-    return (gr.update(value=image if os.path.exists(image) else None, interactive=True), gr.update(value=video if os.path.exists(video) else None, interactive=True),
-            state.to_gradio_chatbot(), False, state, state_, gr.update(value=None, interactive=True))
-def regenerate(state, state_, textbox):
     state.messages.pop(-1)
-    state_.messages.pop(-1)
-    textbox = gr.update(value=None, interactive=True)
     if len(state.messages) > 0:
-        return state, state_, textbox, state.to_gradio_chatbot(), False
-    return state, state_, textbox, state.to_gradio_chatbot(), True
 def clear_history(state, state_):
     state = conv_templates[conv_mode].copy()
     state_ = conv_templates[conv_mode].copy()
     return (gr.update(value=None, interactive=True),
-            gr.update(value=None, interactive=True), \
-            state.to_gradio_chatbot(), \
-            True, state, state_, gr.update(value=None, interactive=True))
 # BUG of Zero Environment
 # 1. The environment is fixed to torch==2.0.1+cu117, gradio>=4.x.x
@@ -230,7 +233,6 @@ with gr.Blocks(title='VideoLLaMA 2 🔥🚀🔥', theme=gr.themes.Default(primar
     gr.Markdown(title_markdown)
     state = gr.State()
     state_ = gr.State()
-    first_run = gr.State()
     with gr.Row():
         with gr.Column(scale=3):
@@ -331,20 +333,20 @@ with gr.Blocks(title='VideoLLaMA 2 🔥🚀🔥', theme=gr.themes.Default(primar
     submit_btn.click(
         generate,
-        [image, video, first_run, state, state_, textbox, temperature, top_p, max_output_tokens],
-        [image, video, chatbot, first_run, state, state_, textbox])
     regenerate_btn.click(
         regenerate,
-        [state, state_, textbox],
-        [state, state_, textbox, chatbot, first_run]).then(
         generate,
-        [image, video, first_run, state, state_, textbox, temperature, top_p, max_output_tokens],
-        [image, video, chatbot, first_run, state, state_, textbox])
     clear_btn.click(
         clear_history,
         [state, state_],
-        [image, video, chatbot, first_run, state, state_, textbox])
 demo.launch()

         # 2. text preprocess (tag process & generate prompt).
         state = self.get_prompt(prompt, state)
         prompt = state.get_prompt()
         input_ids = tokenizer_MMODAL_token(prompt, tokenizer, MMODAL_TOKEN_INDEX[modals[0]], return_tensors='pt')
         input_ids = input_ids.unsqueeze(0).to(self.model.device)
 @spaces.GPU(duration=120)
+def generate(image, video, state, state_, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
     if not textbox_in:
         if len(state_.messages) > 0:
             textbox_in = state_.messages[-1][1]
             state_.messages.pop(-1)
         else:
+            assert "Please enter instruction"
     image = image if image else "none"
     video = video if video else "none"
     if os.path.exists(video):
         show_images += f'<video controls playsinline width="500" style="display: inline-block;"  src="./file={video}"></video>'
+    state.append_message(state.roles[0], textbox_in + "\n" + show_images)
     state.append_message(state.roles[1], textbox_out)
+    # BUG: only support single turn conversation now.
+    state_.messages.pop(-1)
+    state_.messages.pop(-1)
+    return (gr.update(value=image if os.path.exists(image) else None, interactive=True),
+            gr.update(value=video if os.path.exists(video) else None, interactive=True),
+            state.to_gradio_chatbot(), state, state_)
+def regenerate(state, state_):
+    state.messages.pop(-1)
     state.messages.pop(-1)
     if len(state.messages) > 0:
+        return state.to_gradio_chatbot(), state, state_
+    return state.to_gradio_chatbot(), state, state_
 def clear_history(state, state_):
     state = conv_templates[conv_mode].copy()
     state_ = conv_templates[conv_mode].copy()
     return (gr.update(value=None, interactive=True),
+            gr.update(value=None, interactive=True),
+            state.to_gradio_chatbot(), state, state_,
+            gr.update(value=None, interactive=True))
 # BUG of Zero Environment
 # 1. The environment is fixed to torch==2.0.1+cu117, gradio>=4.x.x
     gr.Markdown(title_markdown)
     state = gr.State()
     state_ = gr.State()
     with gr.Row():
         with gr.Column(scale=3):
     submit_btn.click(
         generate,
+        [image, video, state, state_, textbox, temperature, top_p, max_output_tokens],
+        [image, video, chatbot, state, state_])
     regenerate_btn.click(
         regenerate,
+        [state, state_],
+        [chatbot, state, state_]).then(
         generate,
+        [image, video, state, state_, textbox, temperature, top_p, max_output_tokens],
+        [image, video, chatbot, state, state_])
     clear_btn.click(
         clear_history,
         [state, state_],
+        [image, video, chatbot, state, state_, textbox])
 demo.launch()