Spaces:

GT-RIPL
/

GPT-K

Runtime error

App Files Files Community

cwkuo commited on Sep 17, 2023

Commit

ef2dc13

1 Parent(s): d8c6a57

disable beam search as it may cause OoM

Browse files

Files changed (1) hide show

app.py +14 -27

app.py CHANGED Viewed

@@ -159,7 +159,7 @@ def retrieve_knowledge(image):
 @torch.inference_mode()
-def generate(state: Conversation, temperature, top_p, max_new_tokens, add_knwl, do_sampling, do_beam_search):
     if state.skip_next:  # This generate call is skipped due to invalid inputs
         yield (state, state.to_gradio_chatbot()) + (no_change_btn,) * 3 + knwl_unchange
         return
@@ -210,36 +210,24 @@ def generate(state: Conversation, temperature, top_p, max_new_tokens, add_knwl,
     prompt = prompt.split("USER:")[-1].replace("ASSISTANT:", "")
     image_pt = gptk_trans(image).to(device).unsqueeze(0)
     samples = {"image": image_pt, "knowledge": knwl_embd, "prompt": prompt}
-    if bool(do_beam_search):
-        new_text = gptk_model.generate(
             samples=samples,
             use_nucleus_sampling=bool(do_sampling),
             max_length=min(int(max_new_tokens), 1024),
             top_p=float(top_p),
             temperature=float(temperature),
             length_penalty=0.0,
             auto_cast=True
-        )[0]
-        streamer = [new_text, ]
-    else:
-        streamer = TextIteratorStreamer(
-            gptk_model.llm_tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15
         )
-        thread = Thread(
-            target=gptk_model.generate,
-            kwargs=dict(
-                samples=samples,
-                use_nucleus_sampling=bool(do_sampling),
-                max_length=min(int(max_new_tokens), 1024),
-                top_p=float(top_p),
-                temperature=float(temperature),
-                streamer=streamer,
-                num_beams=1,
-                length_penalty=0.0,
-                auto_cast=True
-            )
-        )
-        thread.start()
     generated_text = ""
     for new_text in streamer:
@@ -301,7 +289,6 @@ def build_demo():
                     with gr.Row():
                         add_knwl = gr.Checkbox(value=True, interactive=True, label="Knowledge")
                         do_sampling = gr.Checkbox(value=False, interactive=True, label="Sampling")
-                        do_beam_search = gr.Checkbox(value=False, interactive=True, label="Beam search")
                     temperature = gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
                     top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
                     max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
@@ -331,7 +318,7 @@ def build_demo():
             regenerate, [state], [state, chatbot, textbox, imagebox] + btn_list
         ).then(
             generate,
-            [state, temperature, top_p, max_output_tokens, add_knwl, do_sampling, do_beam_search],
             [state, chatbot] + btn_list + knwl_vis
         )
@@ -343,7 +330,7 @@ def build_demo():
             add_text, [state, textbox, imagebox], [state, chatbot, textbox, imagebox] + btn_list
         ).then(
             generate,
-            [state, temperature, top_p, max_output_tokens, add_knwl, do_sampling, do_beam_search],
             [state, chatbot] + btn_list + knwl_vis
         )
@@ -351,7 +338,7 @@ def build_demo():
             add_text, [state, textbox, imagebox], [state, chatbot, textbox, imagebox] + btn_list
         ).then(
             generate,
-            [state, temperature, top_p, max_output_tokens, add_knwl, do_sampling, do_beam_search],
             [state, chatbot] + btn_list + knwl_vis
         )

 @torch.inference_mode()
+def generate(state: Conversation, temperature, top_p, max_new_tokens, add_knwl, do_sampling):
     if state.skip_next:  # This generate call is skipped due to invalid inputs
         yield (state, state.to_gradio_chatbot()) + (no_change_btn,) * 3 + knwl_unchange
         return
     prompt = prompt.split("USER:")[-1].replace("ASSISTANT:", "")
     image_pt = gptk_trans(image).to(device).unsqueeze(0)
     samples = {"image": image_pt, "knowledge": knwl_embd, "prompt": prompt}
+    streamer = TextIteratorStreamer(
+        gptk_model.llm_tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15
+    )
+    thread = Thread(
+        target=gptk_model.generate,
+        kwargs=dict(
             samples=samples,
             use_nucleus_sampling=bool(do_sampling),
             max_length=min(int(max_new_tokens), 1024),
             top_p=float(top_p),
             temperature=float(temperature),
+            streamer=streamer,
+            num_beams=1,
             length_penalty=0.0,
             auto_cast=True
         )
+    )
+    thread.start()
     generated_text = ""
     for new_text in streamer:
                     with gr.Row():
                         add_knwl = gr.Checkbox(value=True, interactive=True, label="Knowledge")
                         do_sampling = gr.Checkbox(value=False, interactive=True, label="Sampling")
                     temperature = gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
                     top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
                     max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
             regenerate, [state], [state, chatbot, textbox, imagebox] + btn_list
         ).then(
             generate,
+            [state, temperature, top_p, max_output_tokens, add_knwl, do_sampling],
             [state, chatbot] + btn_list + knwl_vis
         )
             add_text, [state, textbox, imagebox], [state, chatbot, textbox, imagebox] + btn_list
         ).then(
             generate,
+            [state, temperature, top_p, max_output_tokens, add_knwl, do_sampling],
             [state, chatbot] + btn_list + knwl_vis
         )
             add_text, [state, textbox, imagebox], [state, chatbot, textbox, imagebox] + btn_list
         ).then(
             generate,
+            [state, temperature, top_p, max_output_tokens, add_knwl, do_sampling],
             [state, chatbot] + btn_list + knwl_vis
         )