input file handling edit
Browse files
app.py
CHANGED
@@ -10,8 +10,11 @@ asr = pipeline(
|
|
10 |
)
|
11 |
pipeline1 = Pipeline.from_pretrained("pyannote/speaker-segmentation")
|
12 |
|
13 |
-
def diarization(file_input,
|
14 |
-
|
|
|
|
|
|
|
15 |
|
16 |
speaker_output = pipeline1(audio)
|
17 |
text_output = asr(audio,return_timestamps="word")
|
@@ -37,13 +40,13 @@ def diarization(file_input,microphone_input,selection):
|
|
37 |
title = "Speech Recognition with Speaker Diarization"
|
38 |
description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
|
39 |
article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
|
40 |
-
inputs = [gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:"),
|
41 |
-
gr.inputs.Audio(source="microphone", type="
|
42 |
-
gr.inputs.Radio(["Upload","Microphone"],type="value",label="Select which input:")]
|
43 |
outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
|
44 |
gr.outputs.Textbox(type="auto",label="Full ASR Text for comparison")]
|
45 |
-
examples = [["test_audio1.wav",
|
46 |
-
["test_audio2.wav",
|
47 |
|
48 |
app = gr.Interface(fn=diarization,
|
49 |
inputs=inputs,
|
|
|
10 |
)
|
11 |
pipeline1 = Pipeline.from_pretrained("pyannote/speaker-segmentation")
|
12 |
|
13 |
+
def diarization(file_input,mic_input,selection):
|
14 |
+
mic_path = None if mic_input is None else mic_input.name
|
15 |
+
audio = file_input if selection == "Upload" else mic_path
|
16 |
+
if audio is None:
|
17 |
+
return "Please check your inputs!", ""
|
18 |
|
19 |
speaker_output = pipeline1(audio)
|
20 |
text_output = asr(audio,return_timestamps="word")
|
|
|
40 |
title = "Speech Recognition with Speaker Diarization"
|
41 |
description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
|
42 |
article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
|
43 |
+
inputs = [gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:", optional=True),
|
44 |
+
gr.inputs.Audio(source="microphone", type="file",label="Or use your Microphone:", optional=True),
|
45 |
+
gr.inputs.Radio(["Upload","Microphone"], type="value", label="Select which input:")]
|
46 |
outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
|
47 |
gr.outputs.Textbox(type="auto",label="Full ASR Text for comparison")]
|
48 |
+
examples = [["test_audio1.wav",None,"Upload"],
|
49 |
+
["test_audio2.wav",None,"Upload"]]
|
50 |
|
51 |
app = gr.Interface(fn=diarization,
|
52 |
inputs=inputs,
|