input handling edit
Browse files
app.py
CHANGED
@@ -8,10 +8,12 @@ asr = pipeline(
|
|
8 |
feature_extractor="facebook/wav2vec2-large-960h-lv60-self",
|
9 |
|
10 |
)
|
11 |
-
|
12 |
|
13 |
-
def diarization(
|
14 |
-
|
|
|
|
|
15 |
text_output = asr(audio,return_timestamps="word")
|
16 |
|
17 |
full_text = text_output['text'].lower()
|
@@ -35,9 +37,13 @@ def diarization(audio):
|
|
35 |
title = "Speech Recognition with Speaker Diarization"
|
36 |
description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
|
37 |
article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
|
38 |
-
inputs = gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:")
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
|
42 |
app = gr.Interface(fn=diarization,
|
43 |
inputs=inputs,
|
|
|
8 |
feature_extractor="facebook/wav2vec2-large-960h-lv60-self",
|
9 |
|
10 |
)
|
11 |
+
pipeline1 = Pipeline.from_pretrained("pyannote/speaker-segmentation")
|
12 |
|
13 |
+
def diarization(file_input,microphone_input,selection):
|
14 |
+
audio = file_input if str(selection) == "Upload" else Path(microphone_input)
|
15 |
+
|
16 |
+
speaker_output = pipeline1(audio)
|
17 |
text_output = asr(audio,return_timestamps="word")
|
18 |
|
19 |
full_text = text_output['text'].lower()
|
|
|
37 |
title = "Speech Recognition with Speaker Diarization"
|
38 |
description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
|
39 |
article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
|
40 |
+
inputs = [gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:"),
|
41 |
+
gr.inputs.Audio(source="microphone", type="filepath",label="Or use your Microphone:"),
|
42 |
+
gr.inputs.Radio(["Upload","Microphone"],type="value",label="Select which input:")]
|
43 |
+
outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
|
44 |
+
gr.outputs.Textbox(type="auto",label="Full ASR Text for comparison")]
|
45 |
+
examples = [["test_audio1.wav","test_audio1.wav","Upload"],
|
46 |
+
["test_audio2.wav","test_audio2.wav","Upload"]]
|
47 |
|
48 |
app = gr.Interface(fn=diarization,
|
49 |
inputs=inputs,
|