Spaces:

nata0801
/

RuEn_ASR_with_Voice_Recorder

Runtime error

nata0801 commited on Jan 18, 2022

Commit

9726882

•

1 Parent(s): a299284

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,18 +21,20 @@ def asr_transcript(audio_file, language):
     model = Wav2Vec2ForCTC.from_pretrained(model)
     #read the file and resample to 16KHz
-    stream = librosa.stream(audio_file.name, block_length=20, frame_length=16000, hop_length=16000)
-    for speech in stream:
-        if len(speech.shape) > 1:
-            speech = speech[:, 0] + speech[:, 1]
-        input_values = tokenizer(speech, return_tensors="pt").input_values
-        logits = model(input_values).logits
-        predicted_ids = torch.argmax(logits, dim=-1)
-        transcription = tokenizer.batch_decode(predicted_ids)[0]
-        transcript += transcription.lower() + " "
     return transcript

     model = Wav2Vec2ForCTC.from_pretrained(model)
     #read the file and resample to 16KHz
+    #stream = librosa.stream(audio_file.name, block_length=20, frame_length=16000, hop_length=16000)
+    #read the file
+    speech, sample_rate = librosa.load(input_file, 16000)
+    if len(speech.shape) > 1:
+        speech = speech[:, 0] + speech[:, 1]
+    input_values = tokenizer(speech, return_tensors="pt").input_values
+    logits = model(input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = tokenizer.batch_decode(predicted_ids)[0]
+    transcript = transcription.lower()
     return transcript