Cahlil commited on
Commit
8983ff3
·
1 Parent(s): d78cd77

input handling edit

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -8,10 +8,12 @@ asr = pipeline(
8
  feature_extractor="facebook/wav2vec2-large-960h-lv60-self",
9
 
10
  )
11
- speaker_diarization = Pipeline.from_pretrained("pyannote/speaker-diarization")
12
 
13
- def diarization(audio):
14
- speaker_output = speaker_diarization(audio)
 
 
15
  text_output = asr(audio,return_timestamps="word")
16
 
17
  full_text = text_output['text'].lower()
@@ -35,9 +37,13 @@ def diarization(audio):
35
  title = "Speech Recognition with Speaker Diarization"
36
  description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
37
  article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
38
- inputs = gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:")
39
- outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),gr.outputs.Textbox(type="auto",label="Full ASR Text for comparison")]
40
- examples = [["test_audio1.wav"]]
 
 
 
 
41
 
42
  app = gr.Interface(fn=diarization,
43
  inputs=inputs,
 
8
  feature_extractor="facebook/wav2vec2-large-960h-lv60-self",
9
 
10
  )
11
+ pipeline1 = Pipeline.from_pretrained("pyannote/speaker-segmentation")
12
 
13
+ def diarization(file_input,microphone_input,selection):
14
+ audio = file_input if str(selection) == "Upload" else Path(microphone_input)
15
+
16
+ speaker_output = pipeline1(audio)
17
  text_output = asr(audio,return_timestamps="word")
18
 
19
  full_text = text_output['text'].lower()
 
37
  title = "Speech Recognition with Speaker Diarization"
38
  description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
39
  article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
40
+ inputs = [gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:"),
41
+ gr.inputs.Audio(source="microphone", type="filepath",label="Or use your Microphone:"),
42
+ gr.inputs.Radio(["Upload","Microphone"],type="value",label="Select which input:")]
43
+ outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
44
+ gr.outputs.Textbox(type="auto",label="Full ASR Text for comparison")]
45
+ examples = [["test_audio1.wav","test_audio1.wav","Upload"],
46
+ ["test_audio2.wav","test_audio2.wav","Upload"]]
47
 
48
  app = gr.Interface(fn=diarization,
49
  inputs=inputs,