Spaces:

TechAudio
/

TeamQwen2AudioInstruct

Sleeping

App Files Files Community

j commited on Nov 6, 2024

Commit

c1f878d

1 Parent(s): 4bbc7e3

Changed the chatbot message format to use strings instead of dictionaries

Browse files

Files changed (1) hide show

demo/web_demo_audio.py +54 -5

demo/web_demo_audio.py CHANGED Viewed

@@ -135,20 +135,69 @@ def _launch_demo(args):
         task_history = gr.State([])
-        # Update event handlers for new input components
         def process_input(text, audio, chatbot, history):
             content = []
             if audio is not None:
                 content.append({'type': 'audio', 'audio_url': audio})
             if text:
                 content.append({'type': 'text', 'text': text})
             history.append({"role": "user", "content": content})
-            chatbot.append([
-                {"text": text, "audio": audio},
-                None
-            ])
             return "", None, chatbot, history
         submit_btn.click(
             fn=process_input,

         task_history = gr.State([])
         def process_input(text, audio, chatbot, history):
+            """Process input with correct message formatting for Chatbot."""
             content = []
+            message_text = []
             if audio is not None:
                 content.append({'type': 'audio', 'audio_url': audio})
+                message_text.append(f"[Audio file uploaded]")
             if text:
                 content.append({'type': 'text', 'text': text})
+                message_text.append(text)
             history.append({"role": "user", "content": content})
+            # Format message for chatbot as a string instead of dict
+            chatbot.append([" ".join(message_text), None])
             return "", None, chatbot, history
+        def predict(chatbot, task_history):
+            """Generate a response from the model."""
+            print(f"{task_history=}")
+            print(f"{chatbot=}")
+            text = processor.apply_chat_template(task_history, add_generation_prompt=True, tokenize=False)
+            audios = []
+            for message in task_history:
+                if isinstance(message["content"], list):
+                    for ele in message["content"]:
+                        if ele["type"] == "audio":
+                            audios.append(
+                                librosa.load(ele['audio_url'], sr=processor.feature_extractor.sampling_rate)[0]
+                            )
+            if len(audios)==0:
+                audios=None
+            print(f"{text=}")
+            print(f"{audios=}")
+            inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True)
+            inputs["input_ids"] = inputs.input_ids.cuda()
+            generate_ids = model.generate(**inputs, max_length=256)
+            generate_ids = generate_ids[:, inputs.input_ids.size(1):]
+            response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+            print(f"{response=}")
+            task_history.append({'role': 'assistant',
+                                 'content': response})
+            chatbot.append((None, response))
+            return chatbot, task_history
+        # Update event handlers for new input components
+#        def process_input(text, audio, chatbot, history):
+#            content = []
+#            if audio is not None:
+#                content.append({'type': 'audio', 'audio_url': audio})
+#            if text:
+#                content.append({'type': 'text', 'text': text})
+#
+#            history.append({"role": "user", "content": content})
+#            chatbot.append([
+#                {"text": text, "audio": audio},
+#                None
+#            ])
+#            return "", None, chatbot, history
         submit_btn.click(
             fn=process_input,