IliaLarchenko commited on
Commit
405ab44
·
1 Parent(s): c468e6a

Removed unnecessary message box

Browse files
Files changed (3) hide show
  1. api/audio.py +15 -0
  2. ui/coding.py +5 -21
  3. ui/instructions.py +1 -11
api/audio.py CHANGED
@@ -98,6 +98,21 @@ class STTManager:
98
  transcript = self.transcribe_numpy_array(audio, context=text)
99
  return text + " " + transcript
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def transcribe_numpy_array(self, audio: np.ndarray, context: Optional[str] = None) -> str:
102
  """
103
  Convert speech to text from a full audio segment.
 
98
  transcript = self.transcribe_numpy_array(audio, context=text)
99
  return text + " " + transcript
100
 
101
+ def transcribe_and_add_to_chat(self, audio: np.ndarray, chat: List[List[Optional[str]]]) -> List[List[Optional[str]]]:
102
+ """
103
+ Transcribe audio and add the transcription to the chat history.
104
+
105
+ :param audio: Numpy array containing audio data.
106
+ :param chat: List of chat messages.
107
+ :return: Updated chat history.
108
+ """
109
+ if len(chat) == 0 or chat[-1][0] is None:
110
+ chat.append(["", None])
111
+
112
+ chat[-1][0] = self.transcribe_audio(audio, chat[-1][0])
113
+
114
+ return chat
115
+
116
  def transcribe_numpy_array(self, audio: np.ndarray, context: Optional[str] = None) -> str:
117
  """
118
  Convert speech to text from a full audio segment.
ui/coding.py CHANGED
@@ -198,18 +198,6 @@ def get_problem_solving_ui(llm: LLMManager, tts: TTSManager, stt: STTManager, de
198
  end_btn = gr.Button("Finish the interview", interactive=False, variant="stop", elem_id=f"end_btn")
199
  chat = gr.Chatbot(label="Chat", show_label=False, show_share_button=False, elem_id=f"chat")
200
 
201
- # I need this message box only because chat component is flickering when I am updating it
202
- # To be improved in the future
203
- message = gr.Textbox(
204
- label="Message",
205
- show_label=False,
206
- lines=5,
207
- max_lines=5,
208
- interactive=False,
209
- container=False,
210
- elem_id=f"message",
211
- )
212
-
213
  audio_input = gr.Audio(interactive=False, **default_audio_params, elem_id=f"audio_input")
214
  audio_buffer = gr.State(np.array([], dtype=np.int16))
215
  audio_to_transcribe = gr.State(np.array([], dtype=np.int16))
@@ -263,25 +251,21 @@ def get_problem_solving_ui(llm: LLMManager, tts: TTSManager, stt: STTManager, de
263
  fn=llm.end_interview, inputs=[description, chat_history, interview_type_select], outputs=[feedback]
264
  )
265
 
 
 
266
  audio_input.stream(
267
  stt.process_audio_chunk,
268
  inputs=[audio_input, audio_buffer],
269
  outputs=[audio_buffer, audio_to_transcribe],
270
  show_progress="hidden",
271
- ).success(fn=stt.transcribe_audio, inputs=[audio_to_transcribe, message], outputs=[message], show_progress="hidden")
272
 
273
- # TODO: find a way to remove delay
274
  audio_input.stop_recording(fn=lambda: time.sleep(2)).success(
275
- fn=add_candidate_message, inputs=[message, chat], outputs=[chat]
276
- ).success(
277
  fn=send_request_partial,
278
  inputs=[code, previous_code, chat_history, chat],
279
  outputs=[chat_history, chat, previous_code, audio_output],
280
- ).success(
281
- fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
282
- ).success(
283
- lambda: "", outputs=[message]
284
- )
285
 
286
  interview_type_select.change(
287
  fn=lambda x: gr.update(choices=topic_lists[x], value=np.random.choice(topic_lists[x])),
 
198
  end_btn = gr.Button("Finish the interview", interactive=False, variant="stop", elem_id=f"end_btn")
199
  chat = gr.Chatbot(label="Chat", show_label=False, show_share_button=False, elem_id=f"chat")
200
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  audio_input = gr.Audio(interactive=False, **default_audio_params, elem_id=f"audio_input")
202
  audio_buffer = gr.State(np.array([], dtype=np.int16))
203
  audio_to_transcribe = gr.State(np.array([], dtype=np.int16))
 
251
  fn=llm.end_interview, inputs=[description, chat_history, interview_type_select], outputs=[feedback]
252
  )
253
 
254
+ # TODO: add a counter for audio chunks to use for better delay handling
255
+ audio_counter = 0
256
  audio_input.stream(
257
  stt.process_audio_chunk,
258
  inputs=[audio_input, audio_buffer],
259
  outputs=[audio_buffer, audio_to_transcribe],
260
  show_progress="hidden",
261
+ ).success(fn=stt.transcribe_and_add_to_chat, inputs=[audio_to_transcribe, chat], outputs=[chat], show_progress="hidden")
262
 
263
+ # TODO: find a way to remove a delay
264
  audio_input.stop_recording(fn=lambda: time.sleep(2)).success(
 
 
265
  fn=send_request_partial,
266
  inputs=[code, previous_code, chat_history, chat],
267
  outputs=[chat_history, chat, previous_code, audio_output],
268
+ ).success(fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer])
 
 
 
 
269
 
270
  interview_type_select.change(
271
  fn=lambda x: gr.update(choices=topic_lists[x], value=np.random.choice(topic_lists[x])),
ui/instructions.py CHANGED
@@ -32,8 +32,7 @@ The AI will present a problem after you initiate the session.
32
  This section is where the interaction happens:
33
  - **Code/Solution Area**: On the left side, you will find a space to write your solution. For codding problem you can use any language, although syntax highlighting is only available for Python and SQL.
34
  - **Communication Area**: On the right, this area includes:
35
- - **Chat History**: Displays the entire dialogue history, showing messages from both you and the AI interviewer.
36
- - **New message text box**: Your recognized speech will be shown in this field before sending it to the AI. It is more like a legacy text input field that will be removed in the future.
37
  - **Audio Record Button**: Use this button to record your responses. Press to start recording, speak your thoughts, and press stop to send your audio. Your message will be sent to the chat, along with a snapshot of your code or any notes from solution text area."
38
 
39
  Engage with the AI as you would with a real interviewer. Provide concise responses and frequent updates rather than long monologues. Your interactions, including any commentary on your code, will be recorded and the AI's responses will be read aloud and displayed in the chat. Follow the AI's instructions and respond to any follow-up questions as they arise.
@@ -69,15 +68,6 @@ def get_instructions_ui(llm, tts, stt, default_audio_params):
69
  chat_example = gr.Chatbot(
70
  label="Chat", show_label=False, show_share_button=False, value=[["Candidate message", "Interviewer message"]]
71
  )
72
- message_example = gr.Textbox(
73
- label="Message",
74
- show_label=False,
75
- placeholder="Your recognized speech will be shown here",
76
- lines=5,
77
- max_lines=5,
78
- interactive=False,
79
- container=False,
80
- )
81
  audio_input_example = gr.Audio(interactive=True, **default_audio_params)
82
 
83
  return instruction_tab
 
32
  This section is where the interaction happens:
33
  - **Code/Solution Area**: On the left side, you will find a space to write your solution. For codding problem you can use any language, although syntax highlighting is only available for Python and SQL.
34
  - **Communication Area**: On the right, this area includes:
35
+ - **Chat History**: Displays the entire dialogue history, showing messages from both you and the AI interviewer. Your recognized speech will be shown here before being sent to the AI.
 
36
  - **Audio Record Button**: Use this button to record your responses. Press to start recording, speak your thoughts, and press stop to send your audio. Your message will be sent to the chat, along with a snapshot of your code or any notes from solution text area."
37
 
38
  Engage with the AI as you would with a real interviewer. Provide concise responses and frequent updates rather than long monologues. Your interactions, including any commentary on your code, will be recorded and the AI's responses will be read aloud and displayed in the chat. Follow the AI's instructions and respond to any follow-up questions as they arise.
 
68
  chat_example = gr.Chatbot(
69
  label="Chat", show_label=False, show_share_button=False, value=[["Candidate message", "Interviewer message"]]
70
  )
 
 
 
 
 
 
 
 
 
71
  audio_input_example = gr.Audio(interactive=True, **default_audio_params)
72
 
73
  return instruction_tab